All of lore.kernel.org
 help / color / mirror / Atom feed
* [pvops-dom0] Adding MCA logging support in pv_ops
@ 2009-07-30  5:15 Ke, Liping
  2009-07-30  8:24 ` Jan Beulich
  2009-07-30  9:57 ` Christoph Egger
  0 siblings, 2 replies; 26+ messages in thread
From: Ke, Liping @ 2009-07-30  5:15 UTC (permalink / raw)
  To: 'jeremy@goop.org'
  Cc: 'xen-devel@lists.xensource.com', Keir Fraser

[-- Attachment #1: Type: text/plain, Size: 694 bytes --]

Hi, Jeremy and Keir

This patch is backport from DOM0 cs902

When an MCE/CMCI error happens (or by polling), the related error
information will be sent to DOM0 by XEN. This patch will help to fetch
the xen-logged information by hypercall and then convert XEN-format
log into Linux format MCELOG. It makes using current available mcelog
tools for native Linux possible.
 
With this patch, after mce/cmci error log information is sent to DOM0,
running mcelog tools in DOM0, you will get same detailed decoded mce
information as in Native Linux.
 
Signed-Off-By: Liping Ke <liping.ke@intel.com>
Signed-Off-By: Yunhong Jiang <yunhong.jiang@intel.com>


Thanks& Regards,
Criping

[-- Attachment #2: 0001-Add-MCA-Logging-support-in-DOM0.patch --]
[-- Type: application/octet-stream, Size: 23213 bytes --]

From ed2b777210a95da5f6fb947919a34160ba8ed117 Mon Sep 17 00:00:00 2001
From: root <root@lke-ep.sh.intel.com>
Date: Thu, 30 Jul 2009 11:16:03 +0800
Subject: [PATCH] Add MCA Logging support in DOM0

This patch is backport from DOM0 cs902

When an MCE/CMCI error happens (or by polling), the related error
information will be sent to DOM0 by XEN. This patch will help to fetch
the xen-logged information by hypercall and then convert XEN-format
log into Linux format MCELOG. It makes using current available mcelog
tools for native Linux possible.

With this patch, after mce/cmci error log information is sent to DOM0,
running mcelog tools in DOM0, you will get same detailed decoded mce
information as in Native Linux.

Signed-Off-By: Liping Ke <liping.ke@intel.com>
Signed-Off-By: Yunhong Jiang <yunhong.jiang@intel.com>
---
 arch/x86/include/asm/xen/hypercall.h |    8 +
 arch/x86/kernel/cpu/mcheck/mce_64.c  |    1 +
 arch/x86/xen/enlighten.c             |    4 +-
 drivers/xen/Kconfig                  |    4 +
 drivers/xen/Makefile                 |    3 +-
 drivers/xen/mce_dom0.c               |  155 +++++++++++++
 include/xen/interface/xen-mca.h      |  422 ++++++++++++++++++++++++++++++++++
 7 files changed, 595 insertions(+), 2 deletions(-)
 create mode 100644 drivers/xen/mce_dom0.c
 create mode 100644 include/xen/interface/xen-mca.h

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3da450b..42a5601 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -46,6 +46,7 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/platform.h>
+#include <xen/interface/xen-mca.h>
 
 /*
  * The hypercall asms have to meet several constraints:
@@ -307,6 +308,13 @@ HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
 }
 
 static inline int
+HYPERVISOR_mca(struct xen_mc *mc_op)
+{
+	mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+	return _hypercall1(int, mca, mc_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 80ec191..96e396d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -35,6 +35,7 @@
 #include <asm/uaccess.h>
 #include <asm/smp.h>
 #include <asm/idle.h>
+#include <asm/xen/hypervisor.h>
 
 #define MISC_MCELOG_MINOR 227
 
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e099e44..11f3f4c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -212,11 +212,13 @@ static __init void xen_init_cpuid_mask(void)
 {
 	unsigned int ax, bx, cx, dx;
 
+	/* If we have PV guest MCE support, enable MCA */
+#if !defined(CONFIG_XEN_MCE)
 	cpuid_leaf1_edx_mask =
 		~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
 		  (1 << X86_FEATURE_MCA)  |  /* disable MCA */
 		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
-
+#endif
 	if (!xen_initial_domain())
 		cpuid_leaf1_edx_mask &=
 			~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index d315f6b..8e200ed 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -87,6 +87,10 @@ config XEN_SYS_HYPERVISOR
 config XEN_XENBUS_FRONTEND
        tristate
 
+config XEN_MCE
+	bool "Enable Xen MCE support in privileged domain"
+	depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
+
 config XEN_GNTDEV
 	bool "userspace grant access device driver"
 	depends on XEN
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 007aa99..6d8c54e 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_GNTDEV)	+= gntdev.o
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
 obj-$(CONFIG_XENFS)			+= xenfs/
-obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
\ No newline at end of file
+obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
+obj-$(CONFIG_XEN_MCE)	+= mce_dom0.o
diff --git a/drivers/xen/mce_dom0.c b/drivers/xen/mce_dom0.c
new file mode 100644
index 0000000..71d0d66
--- /dev/null
+++ b/drivers/xen/mce_dom0.c
@@ -0,0 +1,155 @@
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <xen/interface/xen.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/interface/vcpu.h>
+#include <asm/xen/hypercall.h>
+#include <asm/mce.h>
+
+static int convert_log(struct mc_info *mi)
+{
+	struct mcinfo_common *mic = NULL;
+	struct mcinfo_global *mc_global;
+	struct mcinfo_bank *mc_bank;
+	struct mce m;
+
+	x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
+	if (mic == NULL)
+	{
+		printk(KERN_ERR "DOM0_MCE_LOG: global data is NULL\n");
+		return -1;
+	}
+
+	mce_setup(&m);
+	mc_global = (struct mcinfo_global*)mic;
+	m.mcgstatus = mc_global->mc_gstatus;
+	m.cpu = mc_global->mc_coreid;/*for test*/
+	x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK);
+	do
+	{
+		if (mic == NULL || mic->size == 0)
+			break;
+		if (mic->type == MC_TYPE_BANK)
+		{
+			mc_bank = (struct mcinfo_bank*)mic;
+			m.misc = mc_bank->mc_misc;
+			m.status = mc_bank->mc_status;
+			m.addr = mc_bank->mc_addr;
+			m.tsc = mc_bank->mc_tsc;
+			m.res1 = mc_bank->mc_ctrl2;
+			m.bank = mc_bank->mc_bank;
+			printk(KERN_DEBUG "[CPU%d, BANK%d, addr %llx, state %llx]\n", 
+						m.bank, m.cpu, m.addr, m.status);
+			/*log this record*/
+			mce_log(&m);
+		}
+		mic = x86_mcinfo_next(mic);
+	}while (1);
+
+	return 0;
+}
+
+static mc_info_t *g_mi;
+
+/*dom0 mce virq handler, logging physical mce error info*/
+
+static irqreturn_t mce_dom0_interrupt(int irq, void *dev_id)
+{
+	xen_mc_t mc_op;
+	int result = 0;
+
+	printk(KERN_DEBUG "MCE_DOM0_LOG: enter dom0 mce vIRQ handler\n");
+	mc_op.cmd = XEN_MC_fetch;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
+urgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+	{
+		printk(KERN_DEBUG "MCE_DOM0_LOG: No more urgent data\n");
+		goto nonurgent;
+	}
+	else
+	{
+		result = convert_log(g_mi);
+		if (result) {
+			printk(KERN_ERR "MCE_DOM0_LOG: Log conversion failed\n");
+			goto end;
+		}
+		/* After fetching the telem from DOM0, we need to dec the telem's
+		 * refcnt and release the entry. The telem is reserved and inc
+		 * refcnt when filling the telem.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+
+		goto urgent;
+	}
+nonurgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+	{
+		printk(KERN_DEBUG "MCE_DOM0_LOG: No more nonurgent data\n");
+		goto end;
+	}
+	else
+	{
+		result = convert_log(g_mi);
+		if (result) {
+			printk(KERN_ERR "MCE_DOM0_LOG: Log conversion failed\n");
+			goto end;
+		}
+		/* After fetching the telem from DOM0, we need to dec the telem's
+		 * refcnt and release the entry. The telem is reserved and inc
+		 * refcnt when filling the telem.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+
+		goto nonurgent;
+	}
+end:
+	return IRQ_HANDLED;
+}
+
+static void bind_virq_for_mce(void)
+{
+	int ret;
+
+	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0, 
+		mce_dom0_interrupt, 0, "mce", NULL);
+
+	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
+	if (ret < 0)
+		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+}
+
+static int __init mcelog_init(void)
+{
+	/* Only Controlled Domain is responsible for MCE logging */
+	if (xen_initial_domain()) {
+		printk(KERN_DEBUG "MCE: bind virq for DOM0 Logging\n");
+		bind_virq_for_mce();
+	}
+	return 0;
+
+}
+
+
+static void __exit mcelog_cleanup(void)
+{
+	if (g_mi)
+		kfree(g_mi);
+}
+module_init(mcelog_init);
+module_exit(mcelog_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
new file mode 100644
index 0000000..8ddd3c3
--- /dev/null
+++ b/include/xen/interface/xen-mca.h
@@ -0,0 +1,422 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ * 
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ *    (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ *    (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ *    This is better done as separate task, physical CPU hotplugging,
+ *    and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ *    move a DomU (or Dom0 itself) away from a malicious page
+ *    producing correctable errors.
+ * 9. offlining physical page:
+ *    Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ *     and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+/*
+ * The xen-unstable repo has interface version 0x03000001; out interface
+ * is incompatible with that and any future minor revisions, so we
+ * choose a different version number range that is numerically less
+ * than that used in xen-unstable.
+ */
+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
+
+/* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */
+#define XEN_MC_NONURGENT  0x0001
+/* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */
+#define XEN_MC_URGENT     0x0002
+/* IN: Dom0 acknowledges previosly-fetched telemetry */
+#define XEN_MC_ACK        0x0004
+
+/* OUT: All is ok */
+#define XEN_MC_OK           0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED  0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA       0x2
+/* OUT: Between notification time and this hypercall an other
+ *  (most likely) correctable error happened. The fetched data,
+ *  does not match the original machine check data. */
+#define XEN_MC_NOMATCH      0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ * Dom0 and DomU: Uncorrectable errors are reported via nmi handlers
+ */
+#define MC_TYPE_GLOBAL          0
+#define MC_TYPE_BANK            1
+#define MC_TYPE_EXTENDED        2
+#define MC_TYPE_RECOVERY        3
+
+struct mcinfo_common {
+    uint16_t type;      /* structure type */
+    uint16_t size;      /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE     (1 << 0)
+#define MC_FLAG_UNCORRECTABLE   (1 << 1)
+#define MC_FLAG_RECOVERABLE	(1 << 2)
+#define MC_FLAG_POLLED		(1 << 3)
+#define MC_FLAG_RESET		(1 << 4)
+#define MC_FLAG_CMCI		(1 << 5)
+#define MC_FLAG_MCE		(1 << 6)
+/* contains global x86 mc information */
+struct mcinfo_global {
+    struct mcinfo_common common;
+
+    /* running domain at the time in error (most likely the impacted one) */
+    uint16_t mc_domid;
+    uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+    uint32_t mc_socketid; /* physical socket of the physical core */
+    uint16_t mc_coreid; /* physical impacted core */
+    uint16_t mc_core_threadid; /* core thread of physical core */
+    uint32_t mc_apicid;
+    uint32_t mc_flags;
+    uint64_t mc_gstatus; /* global status */
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+    struct mcinfo_common common;
+
+    uint16_t mc_bank; /* bank nr */
+    uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0
+                        * and if mc_addr is valid. Never valid on DomU. */
+    uint64_t mc_status; /* bank status */
+    uint64_t mc_addr;   /* bank address, only valid
+                         * if addr bit is set in mc_status */
+    uint64_t mc_misc;
+    uint64_t mc_ctrl2;
+    uint64_t mc_tsc;
+};
+
+
+struct mcinfo_msr {
+    uint64_t reg;   /* MSR */
+    uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */ 
+struct mcinfo_extended {
+    struct mcinfo_common common;
+
+    /* You can fill up to five registers.
+     * If you need more, then use this structure
+     * multiple times. */
+
+    uint32_t mc_msrs; /* Number of msr with valid values. */
+    /*
+     * Currently Intel extended MSR (32/64) include all gp registers
+     * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
+     * useful at present. So expand this array to 16/32 to leave room.
+     */
+    struct mcinfo_msr mc_msr[sizeof(void *) * 4];
+};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action 
+ * information to DOM0. 
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in 
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action
+{
+    /* Params for passing the offlined page number to DOM0 */
+    uint64_t mfn;
+    uint64_t status;
+};
+
+struct cpu_offline_action
+{
+    /* Params for passing the identity of the offlined CPU to DOM0 */
+    uint32_t mc_socketid;
+    uint16_t mc_coreid;
+    uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mcinfo_recovery
+{
+    struct mcinfo_common common;
+    uint16_t mc_bank; /* bank nr */
+    uint8_t action_flags;
+    uint8_t action_types;
+    union {
+        struct page_offline_action page_retire;
+        struct cpu_offline_action cpu_offline;
+        uint8_t pad[MAX_UNION_SIZE];
+    } action_info;
+};
+
+
+#define MCINFO_HYPERCALLSIZE	1024
+#define MCINFO_MAXSIZE		768
+
+struct mc_info {
+    /* Number of mcinfo_* entries in mi_data */
+    uint32_t mi_nentries;
+    uint32_t _pad0;
+    uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
+};
+typedef struct mc_info mc_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(mc_info);
+
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS	7	/* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX	0	/* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX	1	/* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM	2	/* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX	3	/* Linux-defined */
+#define MC_CAPS_STD_ECX	4	/* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA	5	/* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX	6	/* cpuid level 0x80000001 (%ecx) */
+
+struct mcinfo_logical_cpu {
+    uint32_t mc_cpunr;          
+    uint32_t mc_chipid; 
+    uint16_t mc_coreid;
+    uint16_t mc_threadid;
+    uint32_t mc_apicid;
+    uint32_t mc_clusterid;
+    uint32_t mc_ncores;
+    uint32_t mc_ncores_active;
+    uint32_t mc_nthreads;
+    int32_t mc_cpuid_level;
+    uint32_t mc_family;
+    uint32_t mc_vendor;
+    uint32_t mc_model;
+    uint32_t mc_step;
+    char mc_vendorid[16];
+    char mc_brandid[64];
+    uint32_t mc_cpu_caps[MC_NCAPS];
+    uint32_t mc_cache_size;
+    uint32_t mc_cache_alignment;
+    int32_t mc_nmsrvals;
+    struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+};
+typedef struct mcinfo_logical_cpu xen_mc_logical_cpu_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_logical_cpu);
+
+
+/* 
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi)    \
+    (_mi)->mi_nentries
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi)       \
+    ((struct mcinfo_common *)(_mi)->mi_data)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic)       \
+    ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
+
+/* Prototype:
+ *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+#define x86_mcinfo_lookup(_ret, _mi, _type)    \
+    do {                                                        \
+        uint32_t found, i;                                      \
+        struct mcinfo_common *_mic;                             \
+                                                                \
+        found = 0;                                              \
+	(_ret) = NULL;						\
+	if (_mi == NULL) break;					\
+        _mic = x86_mcinfo_first(_mi);                           \
+        for (i = 0; i < x86_mcinfo_nentries(_mi); i++) {        \
+            if (_mic->type == (_type)) {                        \
+                found = 1;                                      \
+                break;                                          \
+            }                                                   \
+            _mic = x86_mcinfo_next(_mic);                       \
+        }                                                       \
+        (_ret) = found ? _mic : NULL;                           \
+    } while (0)
+
+
+/* Usecase 1
+ * Register machine check trap callback handler
+ *    (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch            1
+struct xen_mc_fetch {
+    /* IN/OUT variables. */
+    uint32_t flags;	/* IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
+                           XEN_MC_ACK if ack'ing an earlier fetch */
+			/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED,
+			   XEN_MC_NODATA, XEN_MC_NOMATCH */
+    uint32_t _pad0;
+    uint64_t fetch_id;	/* OUT: id for ack, IN: id we are ack'ing */
+
+    /* OUT variables. */
+    GUEST_HANDLE(mc_info) data;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain     2
+struct xen_mc_notifydomain {
+    /* IN variables. */
+    uint16_t mc_domid;    /* The unprivileged domain to notify. */
+    uint16_t mc_vcpuid;   /* The vcpu in mc_domid to notify.
+                           * Usually echo'd value from the fetch hypercall. */
+
+    /* IN/OUT variables. */
+    uint32_t flags;
+
+/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
+
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+	/* IN/OUT */
+	uint32_t ncpus;
+	uint32_t _pad0;
+	/* OUT */
+	GUEST_HANDLE(xen_mc_logical_cpu) info;
+};
+
+#define XEN_MC_msrinject    4
+#define MC_MSRINJ_MAXMSRS       8
+struct xen_mc_msrinject {
+       /* IN */
+	uint32_t mcinj_cpunr;           /* target processor id */
+	uint32_t mcinj_flags;           /* see MC_MSRINJ_F_* below */
+	uint32_t mcinj_count;           /* 0 .. count-1 in array are valid */
+	uint32_t _pad0;
+	struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE   0x1
+
+#define XEN_MC_mceinject    5
+struct xen_mc_mceinject {
+	unsigned int mceinj_cpunr;      /* target processor id */
+};
+
+struct xen_mc {
+    uint32_t cmd;
+    uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+    union {
+        struct xen_mc_fetch        mc_fetch;
+        struct xen_mc_notifydomain mc_notifydomain;
+        struct xen_mc_physcpuinfo  mc_physcpuinfo;
+        struct xen_mc_msrinject    mc_msrinject;
+        struct xen_mc_mceinject    mc_mceinject;
+    } u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
-- 
1.5.1


[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in pv_ops
  2009-07-30  5:15 [pvops-dom0] Adding MCA logging support in pv_ops Ke, Liping
@ 2009-07-30  8:24 ` Jan Beulich
  2009-07-30  8:45   ` Ke, Liping
  2009-07-31 15:19   ` Andi Kleen
  2009-07-30  9:57 ` Christoph Egger
  1 sibling, 2 replies; 26+ messages in thread
From: Jan Beulich @ 2009-07-30  8:24 UTC (permalink / raw)
  To: Liping Ke; +Cc: Jeremy Fitzhardinge, xen-devel, Keir Fraser

>>> "Ke, Liping" <liping.ke@intel.com> 30.07.09 07:15 >>>
>This patch is backport from DOM0 cs902

What base kernel version is this against? I noticed that in .31 struct mce
doesn't have a res1 field anymore, and hence there's no place to
(compatibly) store mc_bank->mc_ctrl2. Do you have any plans on how
to address this? Additionally, the new use of the space previously
consumed by the res1 field seems to put under question whether that
was actually forward compatible (namely, whether it might confuse
newer user space tools).

Thanks, Jan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-07-30  8:24 ` Jan Beulich
@ 2009-07-30  8:45   ` Ke, Liping
  2009-07-30  9:23     ` Ke, Liping
  2009-07-31 15:19   ` Andi Kleen
  1 sibling, 1 reply; 26+ messages in thread
From: Ke, Liping @ 2009-07-30  8:45 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Jeremy Fitzhardinge, xen-devel@lists.xensource.com, Keir Fraser

Hi,  Jan& Jeremy

Thanks for the feedback. This is back port from 2.6.18 old DOM0 Kernel.

And also, I got some feedbacks from Andi Kleen. I will modify all of them and resend it tomorrow.

Thanks a lot!
Criping

Jan Beulich wrote:
>>>> "Ke, Liping" <liping.ke@intel.com> 30.07.09 07:15 >>>
>> This patch is backport from DOM0 cs902
> 
> What base kernel version is this against? I noticed that in .31
> struct mce doesn't have a res1 field anymore, and hence there's no
> place to (compatibly) store mc_bank->mc_ctrl2. Do you have any plans
> on how 
> to address this? Additionally, the new use of the space previously
> consumed by the res1 field seems to put under question whether that
> was actually forward compatible (namely, whether it might confuse
> newer user space tools).
> 
> Thanks, Jan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-07-30  8:45   ` Ke, Liping
@ 2009-07-30  9:23     ` Ke, Liping
  2009-07-30 17:14       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 26+ messages in thread
From: Ke, Liping @ 2009-07-30  9:23 UTC (permalink / raw)
  To: Ke, Liping, Jan Beulich
  Cc: Jeremy Fitzhardinge, xen-devel@lists.xensource.com, Keir Fraser

Hi, Jan

I also noticed that in .31, several important missing field such as apic_id, 
socket id is added. Those field is better to be filled too.
Since now pv_ops is based on .30, those changes haven't happen yet.

So what's your suggestion? Seems we have to keep the old code and make
a patch to it when .31 is supported in pv_ops?

Thanks& Regards,
Criping



Ke, Liping wrote:
> Hi,  Jan& Jeremy
> 
> Thanks for the feedback. This is back port from 2.6.18 old DOM0
> Kernel. 
> 
> And also, I got some feedbacks from Andi Kleen. I will modify all of
> them and resend it tomorrow. 
> 
> Thanks a lot!
> Criping
> 
> Jan Beulich wrote:
>>>>> "Ke, Liping" <liping.ke@intel.com> 30.07.09 07:15 >>>
>>> This patch is backport from DOM0 cs902
>> 
>> What base kernel version is this against? I noticed that in .31
>> struct mce doesn't have a res1 field anymore, and hence there's no
>> place to (compatibly) store mc_bank->mc_ctrl2. Do you have any plans
>> on how to address this? Additionally, the new use of the space
>> previously consumed by the res1 field seems to put under question
>> whether that was actually forward compatible (namely, whether it
>> might confuse newer user space tools). 
>> 
>> Thanks, Jan
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in pv_ops
  2009-07-30  5:15 [pvops-dom0] Adding MCA logging support in pv_ops Ke, Liping
  2009-07-30  8:24 ` Jan Beulich
@ 2009-07-30  9:57 ` Christoph Egger
  2009-08-05  1:57   ` Ke, Liping
  1 sibling, 1 reply; 26+ messages in thread
From: Christoph Egger @ 2009-07-30  9:57 UTC (permalink / raw)
  To: xen-devel; +Cc: jeremy, Keir Fraser, Ke, Liping


Hi Ke Liping!

The xen mca public header needs some comment updates in the xen tree first
before it populates to the guests:

In line 102, uncorrectable errors are not reported via nmi handlers.
Please update the comment to match the code.

In struct mcinfo_recovery, it is not obvious that the REC_ACTION_* flags are 
intended to be used in action_flags. Please add a comment to make this clear.

In struct mcinfo_recovery, it is not obvious that the MC_ACTION_* flags are
intended to be used in action_types. Actually are these flags?
These are defined as a bitfield, but the union can't store more than one 
action. Please add a comment to clarify this.

For xen_mc_notifydomain, it is not clear if this has to be used before or 
after acknowledging the fetched telemetry. Please update the comment
to clarify this.

In xen_mc_notifydomain, the comment this the flags field may contain 
XEN_MC_TRAP which doesn't exist. Please update the comment.

Christoph



On Thursday 30 July 2009 07:15:05 Ke, Liping wrote:
> Hi, Jeremy and Keir
>
> This patch is backport from DOM0 cs902
>
> When an MCE/CMCI error happens (or by polling), the related error
> information will be sent to DOM0 by XEN. This patch will help to fetch
> the xen-logged information by hypercall and then convert XEN-format
> log into Linux format MCELOG. It makes using current available mcelog
> tools for native Linux possible.
>
> With this patch, after mce/cmci error log information is sent to DOM0,
> running mcelog tools in DOM0, you will get same detailed decoded mce
> information as in Native Linux.
>
> Signed-Off-By: Liping Ke <liping.ke@intel.com>
> Signed-Off-By: Yunhong Jiang <yunhong.jiang@intel.com>
>
>
> Thanks& Regards,
> Criping



-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Thomas M. McCoy, Giuliano Meroni
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-07-30  9:23     ` Ke, Liping
@ 2009-07-30 17:14       ` Jeremy Fitzhardinge
  2009-08-05  2:13         ` Ke, Liping
  2009-08-05  2:54         ` Ke, Liping
  0 siblings, 2 replies; 26+ messages in thread
From: Jeremy Fitzhardinge @ 2009-07-30 17:14 UTC (permalink / raw)
  To: Ke, Liping; +Cc: xen-devel@lists.xensource.com, Keir Fraser, Jan Beulich

On 07/30/09 02:23, Ke, Liping wrote:
> Hi, Jan
>
> I also noticed that in .31, several important missing field such as apic_id, 
> socket id is added. Those field is better to be filled too.
> Since now pv_ops is based on .30, those changes haven't happen yet.
>
> So what's your suggestion? Seems we have to keep the old code and make
> a patch to it when .31 is supported in pv_ops?
>   

The "rebase/master" branch is based on 2.6.31-rc4.  It looks like your
patch doesn't really have any other code-level dependencies on the dom0
changes, so you could just base it on upstream 2.6.31-rc4.

    J

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in pv_ops
  2009-07-30  8:24 ` Jan Beulich
  2009-07-30  8:45   ` Ke, Liping
@ 2009-07-31 15:19   ` Andi Kleen
  1 sibling, 0 replies; 26+ messages in thread
From: Andi Kleen @ 2009-07-31 15:19 UTC (permalink / raw)
  To: Jan Beulich; +Cc: Jeremy Fitzhardinge, xen-devel, Keir Fraser, Liping Ke

"Jan Beulich" <JBeulich@novell.com> writes:

> Additionally, the new use of the space previously
> consumed by the res1 field seems to put under question whether that
> was actually forward compatible (namely, whether it might confuse
> newer user space tools).

It will, either by logging junk fields or if you're unlucky (and e.g. broke
the CPUID field) completely wrong output.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in pv_ops
  2009-07-30  9:57 ` Christoph Egger
@ 2009-08-05  1:57   ` Ke, Liping
  2009-08-05  9:16     ` Christoph Egger
  0 siblings, 1 reply; 26+ messages in thread
From: Ke, Liping @ 2009-08-05  1:57 UTC (permalink / raw)
  To: Christoph Egger, xen-devel@lists.xensource.com
  Cc: jeremy@goop.org, Keir Fraser

Hi, Christoph
Please see my below comments.

And also, I found some interfaces are different in pv_ops kernel such as 
GUEST_HANDLE related. Seems we can't keep the same copy of common
file between XEN and GUEST.  We have to do slight changes to the XEN 
file before copying it to guest kernel.

And also, for the header file, I modified a little according to Andi's feedback
such as gigantic macros will be unacceptable according to kernel code 
conventions, etc. So I modify x86_mcinfo_lookup into inline function.

I will resend the new patch to all of you for further feedback. After the patch 
is accepted, I will sync the modified head file back to XEN for consistency.

Thanks a lot for your help!
Criping



Christoph Egger wrote:
> Hi Ke Liping!
> 
> The xen mca public header needs some comment updates in the xen tree
> first before it populates to the guests:
Yes and thanks!
> 
> In line 102, uncorrectable errors are not reported via nmi handlers.
> Please update the comment to match the code.
> 
OK, I will modify this comment

> In struct mcinfo_recovery, it is not obvious that the REC_ACTION_*
> flags are intended to be used in action_flags. Please add a comment
> to make this clear. 
> 
> In struct mcinfo_recovery, it is not obvious that the MC_ACTION_*
> flags are intended to be used in action_types. Actually are these
> flags? 

OK, I will add comments on this

> These are defined as a bitfield, but the union can't store more than
> one action. Please add a comment to clarify this.
> 

Currently, We only support one action per bank. Do you have requirement
on multiple actions per bank?

> For xen_mc_notifydomain, it is not clear if this has to be used
> before or after acknowledging the fetched telemetry. Please update
> the comment 
> to clarify this.
> 
> In xen_mc_notifydomain, the comment this the flags field may contain
> XEN_MC_TRAP which doesn't exist. Please update the comment.
> 
According to the latest do_mca hypercall implementation, seems
XEN_MC_notifydomain is unsupported, so I never use this hypercall.
Also, I did not see XEN_MC_CORRECTABLE definition in current code too. 
So I will remove both of them in comments here. 
When this hypercall is implemented, the author should update this comments here?

> Christoph
> 
> 
> 
> On Thursday 30 July 2009 07:15:05 Ke, Liping wrote:
>> Hi, Jeremy and Keir
>> 
>> This patch is backport from DOM0 cs902
>> 
>> When an MCE/CMCI error happens (or by polling), the related error
>> information will be sent to DOM0 by XEN. This patch will help to
>> fetch the xen-logged information by hypercall and then convert
>> XEN-format log into Linux format MCELOG. It makes using current
>> available mcelog tools for native Linux possible.
>> 
>> With this patch, after mce/cmci error log information is sent to
>> DOM0, running mcelog tools in DOM0, you will get same detailed
>> decoded mce information as in Native Linux.
>> 
>> Signed-Off-By: Liping Ke <liping.ke@intel.com>
>> Signed-Off-By: Yunhong Jiang <yunhong.jiang@intel.com>
>> 
>> 
>> Thanks& Regards,
>> Criping

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-07-30 17:14       ` Jeremy Fitzhardinge
@ 2009-08-05  2:13         ` Ke, Liping
  2009-08-05 22:55           ` Jeremy Fitzhardinge
  2009-08-05  2:54         ` Ke, Liping
  1 sibling, 1 reply; 26+ messages in thread
From: Ke, Liping @ 2009-08-05  2:13 UTC (permalink / raw)
  To: Jeremy Fitzhardinge, Jan Beulich, Christoph Egger, Kleen, Andi
  Cc: Keir, xen-devel@lists.xensource.com, Fraser

[-- Attachment #1: Type: text/plain, Size: 1262 bytes --]

Hi, all
I modified the patch according to all feedbacks.
Mainly changes inlcudes:
1. Rebased code to "rebase/master" branch 2.6.31-rc4. We fetch more
    useful physical cpu information which is useful to mcelog tools.
2. Modified the code for defects/ kernel conventions according to feedback.
3. Update out of date comments and add neccessary comments according
   to feedback.

This patch is only for your review. I will resend the formated patch later.
Also, I will re-sync the xen-mca.h head file changes back to XEN after review.

Thanks a lot for your help!
Criping

Jeremy Fitzhardinge wrote:
> On 07/30/09 02:23, Ke, Liping wrote:
>> Hi, Jan
>> 
>> I also noticed that in .31, several important missing field such as
>> apic_id, socket id is added. Those field is better to be filled too.
>> Since now pv_ops is based on .30, those changes haven't happen yet.
>> 
>> So what's your suggestion? Seems we have to keep the old code and
>> make 
>> a patch to it when .31 is supported in pv_ops?
>> 
> 
> The "rebase/master" branch is based on 2.6.31-rc4.  It looks like your
> patch doesn't really have any other code-level dependencies on the
> dom0 changes, so you could just base it on upstream 2.6.31-rc4.
> 
>     J


[-- Attachment #2: mce_pvops.patch --]
[-- Type: application/octet-stream, Size: 23422 bytes --]

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3da450b..42a5601 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -46,6 +46,7 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/platform.h>
+#include <xen/interface/xen-mca.h>
 
 /*
  * The hypercall asms have to meet several constraints:
@@ -307,6 +308,13 @@ HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
 }
 
 static inline int
+HYPERVISOR_mca(struct xen_mc *mc_op)
+{
+	mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+	return _hypercall1(int, mca, mc_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 78970be..b6a9f3b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -217,13 +217,17 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 static __init void xen_init_cpuid_mask(void)
 {
 	unsigned int ax, bx, cx, dx;
-
+#if !defined(CONFIG_XEN_MCE)
 	cpuid_leaf1_edx_mask =
 		~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
 		  (1 << X86_FEATURE_MCA)  |  /* disable MCA */
 		  (1 << X86_FEATURE_PAT)  |  /* disable PAT */
 		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
-
+#else
+	cpuid_leaf1_edx_mask =
+		~((1 << X86_FEATURE_PAT)  |  /* disable PAT */
+		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+#endif
 	cpuid_leaf81_edx_mask = ~(1 << (X86_FEATURE_GBPAGES % 32));
 
 	if (!xen_initial_domain())
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 3b1c421..c627fee 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -88,6 +88,10 @@ config XEN_SYS_HYPERVISOR
 config XEN_XENBUS_FRONTEND
        tristate
 
+config XEN_MCE
+       def_bool y
+       depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
+
 config XEN_S3
        def_bool y
-       depends on XEN_DOM0 && ACPI
\ No newline at end of file
+       depends on XEN_DOM0 && ACPI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 386c775..c893102 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
 obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
-obj-$(CONFIG_XEN_S3)			+= acpi.o
\ No newline at end of file
+obj-$(CONFIG_XEN_MCE)	+= mce_dom0.o
+obj-$(CONFIG_XEN_S3)			+= acpi.o
diff --git a/drivers/xen/mce_dom0.c b/drivers/xen/mce_dom0.c
new file mode 100644
index 0000000..064616d
--- /dev/null
+++ b/drivers/xen/mce_dom0.c
@@ -0,0 +1,218 @@
+/******************************************************************************
+ * mce_dom0.c
+ * Add Machine Check event Logging support in DOM0
+ *
+ * Driver for receiving and logging machine check event
+ *
+ * Copyright (c) 2008, 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <xen/interface/xen.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/interface/vcpu.h>
+#include <asm/xen/hypercall.h>
+#include <asm/mce.h>
+
+static mc_info_t *g_mi;
+static mcinfo_logical_cpu_t *g_physinfo;
+static uint32_t ncpus;
+
+static int convert_log(struct mc_info *mi)
+{
+	struct mcinfo_common *mic = NULL;
+	struct mcinfo_global *mc_global;
+	struct mcinfo_bank *mc_bank;
+	struct mce m;
+	int i, found = 0;
+
+	x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
+	WARN_ON(!mic);
+
+	mce_setup(&m);
+	mc_global = (struct mcinfo_global*)mic;
+	m.mcgstatus = mc_global->mc_gstatus;
+	m.apicid = mc_global->mc_apicid;
+	for (i = 0; i < ncpus; i++ ) {
+		if (g_physinfo[i].mc_apicid ==  m.apicid)
+		{
+			found = 1;
+			break;
+		}
+	}
+	WARN_ON(!found);
+
+	m.socketid = g_physinfo[i].mc_chipid;
+	m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
+	m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
+	x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
+	do
+	{
+		if (mic == NULL || mic->size == 0)
+			break;
+		if (mic->type == MC_TYPE_BANK)
+		{
+			mc_bank = (struct mcinfo_bank*)mic;
+			m.misc = mc_bank->mc_misc;
+			m.status = mc_bank->mc_status;
+			m.addr = mc_bank->mc_addr;
+			m.tsc = mc_bank->mc_tsc;
+			m.bank = mc_bank->mc_bank;
+			/*log this record*/
+			mce_log(&m);
+		}
+		mic = x86_mcinfo_next(mic);
+	}
+	while (1);
+
+	return 0;
+}
+
+/*dom0 mce virq handler, logging physical mce error info*/
+static irqreturn_t mce_dom0_interrupt(int irq, void *dev_id)
+{
+	xen_mc_t mc_op;
+	int result = 0;
+
+	mc_op.cmd = XEN_MC_fetch;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
+urgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+		goto nonurgent;
+	else
+	{
+		result = convert_log(g_mi);
+		if (result)
+			goto end;
+		/* After fetching the error event log entry from DOM0, we need to dec
+		 * the refcnt and release the entry. The entry is reserved and inc
+		 * refcnt when filling the error log entry.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+		goto urgent;
+	}
+nonurgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+		goto end;
+	else
+	{
+		result = convert_log(g_mi);
+		if (result)
+			goto end;
+		/* After fetching the error event log entry from DOM0, we need to dec
+		 * the refcnt and release the entry. The entry is reserved and inc
+		 * refcnt when filling the error log entry.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+		goto nonurgent;
+	}
+end:
+	return IRQ_HANDLED;
+}
+
+static int bind_virq_for_mce(void)
+{
+	int ret;
+	xen_mc_t mc_op;
+
+	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+		mce_dom0_interrupt, 0, "mce", NULL);
+
+	if (ret < 0) {
+		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+		return ret;
+	}
+
+	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
+
+	if(!g_mi)
+		return -ENOMEM;
+
+	/* Fetch physical CPU Numbers */
+	mc_op.cmd = XEN_MC_physcpuinfo;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPU numbers\n");
+		kfree(g_mi);
+		return ret;
+	}
+
+	/* Fetch each CPU Physical Info for later reference*/
+	ncpus = mc_op.u.mc_physcpuinfo.ncpus;
+	g_physinfo = kmalloc(sizeof(struct mcinfo_logical_cpu)*ncpus, GFP_KERNEL);
+	if (!g_physinfo) {
+		kfree(g_mi);
+		return -ENOMEM;
+	}
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPUs info\n");
+		kfree(g_mi);
+		kfree(g_physinfo);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __init mcelog_init(void)
+{
+	/* Only DOM0 is responsible for MCE logging */
+	if (xen_initial_domain())
+		return bind_virq_for_mce();
+
+	return 0;
+}
+
+
+static void __exit mcelog_cleanup(void)
+{
+	if (g_mi)
+		kfree(g_mi);
+	if (g_physinfo)
+		kfree(g_physinfo);
+}
+module_init(mcelog_init);
+module_exit(mcelog_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
new file mode 100644
index 0000000..182f33c
--- /dev/null
+++ b/include/xen/interface/xen-mca.h
@@ -0,0 +1,424 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ * 
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ *    (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ *    (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ *    This is better done as separate task, physical CPU hotplugging,
+ *    and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ *    move a DomU (or Dom0 itself) away from a malicious page
+ *    producing correctable errors.
+ * 9. offlining physical page:
+ *    Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ *     and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+/*
+ * The xen-unstable repo has interface version 0x03000001; out interface
+ * is incompatible with that and any future minor revisions, so we
+ * choose a different version number range that is numerically less
+ * than that used in xen-unstable.
+ */
+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
+
+/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
+#define XEN_MC_NONURGENT  0x0001
+/* IN: Dom0/DomU calls hypercall to retrieve urgent error log entry */
+#define XEN_MC_URGENT     0x0002
+/* IN: Dom0 acknowledges previosly-fetched error log entry */
+#define XEN_MC_ACK        0x0004
+
+/* OUT: All is ok */
+#define XEN_MC_OK           0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED  0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA       0x2
+/* OUT: Between notification time and this hypercall an other
+ *  (most likely) correctable error happened. The fetched data,
+ *  does not match the original machine check data. */
+#define XEN_MC_NOMATCH      0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ */
+#define MC_TYPE_GLOBAL          0
+#define MC_TYPE_BANK            1
+#define MC_TYPE_EXTENDED        2
+#define MC_TYPE_RECOVERY        3
+
+struct mcinfo_common {
+    uint16_t type;      /* structure type */
+    uint16_t size;      /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE     (1 << 0)
+#define MC_FLAG_UNCORRECTABLE   (1 << 1)
+#define MC_FLAG_RECOVERABLE	(1 << 2)
+#define MC_FLAG_POLLED		(1 << 3)
+#define MC_FLAG_RESET		(1 << 4)
+#define MC_FLAG_CMCI		(1 << 5)
+#define MC_FLAG_MCE		(1 << 6)
+/* contains global x86 mc information */
+struct mcinfo_global {
+    struct mcinfo_common common;
+
+    /* running domain at the time in error (most likely the impacted one) */
+    uint16_t mc_domid;
+    uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+    uint32_t mc_socketid; /* physical socket of the physical core */
+    uint16_t mc_coreid; /* physical impacted core */
+    uint16_t mc_core_threadid; /* core thread of physical core */
+    uint32_t mc_apicid;
+    uint32_t mc_flags;
+    uint64_t mc_gstatus; /* global status */
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+    struct mcinfo_common common;
+
+    uint16_t mc_bank; /* bank nr */
+    uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0
+                        * and if mc_addr is valid. Never valid on DomU. */
+    uint64_t mc_status; /* bank status */
+    uint64_t mc_addr;   /* bank address, only valid
+                         * if addr bit is set in mc_status */
+    uint64_t mc_misc;
+    uint64_t mc_ctrl2;
+    uint64_t mc_tsc;
+};
+
+
+struct mcinfo_msr {
+    uint64_t reg;   /* MSR */
+    uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */ 
+struct mcinfo_extended {
+    struct mcinfo_common common;
+
+    /* You can fill up to five registers.
+     * If you need more, then use this structure
+     * multiple times. */
+
+    uint32_t mc_msrs; /* Number of msr with valid values. */
+    /*
+     * Currently Intel extended MSR (32/64) include all gp registers
+     * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
+     * useful at present. So expand this array to 16/32 to leave room.
+     */
+    struct mcinfo_msr mc_msr[sizeof(void *) * 4];
+};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action 
+ * information to DOM0. 
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in 
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action
+{
+    /* Params for passing the offlined page number to DOM0 */
+    uint64_t mfn;
+    uint64_t status;
+};
+
+struct cpu_offline_action
+{
+    /* Params for passing the identity of the offlined CPU to DOM0 */
+    uint32_t mc_socketid;
+    uint16_t mc_coreid;
+    uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mcinfo_recovery
+{
+    struct mcinfo_common common;
+    uint16_t mc_bank; /* bank nr */
+    /* Recovery Action Flags defined above such as REC_ACTION_DONE */
+    uint8_t action_flags;
+    /* Recovery Action types defined above such as MC_ACTION_PAGE_OFFLINE */
+    uint8_t action_types;
+    union {
+        struct page_offline_action page_retire;
+        struct cpu_offline_action cpu_offline;
+        uint8_t pad[MAX_UNION_SIZE];
+    } action_info;
+};
+
+
+#define MCINFO_HYPERCALLSIZE	1024
+#define MCINFO_MAXSIZE		768
+
+struct mc_info {
+    /* Number of mcinfo_* entries in mi_data */
+    uint32_t mi_nentries;
+    uint32_t _pad0;
+    uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
+};
+typedef struct mc_info mc_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(mc_info);
+
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS	7	/* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX	0	/* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX	1	/* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM	2	/* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX	3	/* Linux-defined */
+#define MC_CAPS_STD_ECX	4	/* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA	5	/* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX	6	/* cpuid level 0x80000001 (%ecx) */
+
+struct mcinfo_logical_cpu {
+    uint32_t mc_cpunr;          
+    uint32_t mc_chipid; 
+    uint16_t mc_coreid;
+    uint16_t mc_threadid;
+    uint32_t mc_apicid;
+    uint32_t mc_clusterid;
+    uint32_t mc_ncores;
+    uint32_t mc_ncores_active;
+    uint32_t mc_nthreads;
+    int32_t mc_cpuid_level;
+    uint32_t mc_family;
+    uint32_t mc_vendor;
+    uint32_t mc_model;
+    uint32_t mc_step;
+    char mc_vendorid[16];
+    char mc_brandid[64];
+    uint32_t mc_cpu_caps[MC_NCAPS];
+    uint32_t mc_cache_size;
+    uint32_t mc_cache_alignment;
+    int32_t mc_nmsrvals;
+    struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+};
+typedef struct mcinfo_logical_cpu mcinfo_logical_cpu_t;
+DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
+
+
+/* 
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi)    \
+    (_mi)->mi_nentries
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi)       \
+    ((struct mcinfo_common *)(_mi)->mi_data)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic)       \
+    ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
+
+/* Prototype:
+ *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+
+static inline void x86_mcinfo_lookup
+			(struct mcinfo_common **ret, struct mc_info *mi, uint16_t type)
+{
+	uint32_t found = 0, i;
+	struct mcinfo_common *mic;
+
+	*ret = NULL;
+	if ( !mi )
+		return;
+	mic = x86_mcinfo_first(mi);
+
+	for ( i = 0; i < x86_mcinfo_nentries(mi); i++ ) {
+		if ( mic->type == type) {
+			found = 1;
+			break;
+		}
+		mic = x86_mcinfo_next(mic);
+	}
+
+	*ret = found? mic : NULL;
+}
+/* Usecase 1
+ * Register machine check trap callback handler
+ *    (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch            1
+struct xen_mc_fetch {
+    /* IN/OUT variables. */
+    uint32_t flags;	/* IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
+                           XEN_MC_ACK if ack'ing an earlier fetch */
+			/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED,
+			   XEN_MC_NODATA, XEN_MC_NOMATCH */
+    uint32_t _pad0;
+    uint64_t fetch_id;	/* OUT: id for ack, IN: id we are ack'ing */
+
+    /* OUT variables. */
+    GUEST_HANDLE(mc_info) data;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain     2
+struct xen_mc_notifydomain {
+    /* IN variables. */
+    uint16_t mc_domid;    /* The unprivileged domain to notify. */
+    uint16_t mc_vcpuid;   /* The vcpu in mc_domid to notify.
+                           * Usually echo'd value from the fetch hypercall. */
+
+    /* IN/OUT variables. */
+    uint32_t flags;
+
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
+
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+	/* IN/OUT */
+	uint32_t ncpus;
+	uint32_t _pad0;
+	/* OUT */
+	GUEST_HANDLE(mcinfo_logical_cpu) info;
+};
+
+#define XEN_MC_msrinject    4
+#define MC_MSRINJ_MAXMSRS       8
+struct xen_mc_msrinject {
+       /* IN */
+	uint32_t mcinj_cpunr;           /* target processor id */
+	uint32_t mcinj_flags;           /* see MC_MSRINJ_F_* below */
+	uint32_t mcinj_count;           /* 0 .. count-1 in array are valid */
+	uint32_t _pad0;
+	struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE   0x1
+
+#define XEN_MC_mceinject    5
+struct xen_mc_mceinject {
+	unsigned int mceinj_cpunr;      /* target processor id */
+};
+
+struct xen_mc {
+    uint32_t cmd;
+    uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+    union {
+        struct xen_mc_fetch        mc_fetch;
+        struct xen_mc_notifydomain mc_notifydomain;
+        struct xen_mc_physcpuinfo  mc_physcpuinfo;
+        struct xen_mc_msrinject    mc_msrinject;
+        struct xen_mc_mceinject    mc_mceinject;
+    } u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-07-30 17:14       ` Jeremy Fitzhardinge
  2009-08-05  2:13         ` Ke, Liping
@ 2009-08-05  2:54         ` Ke, Liping
  2009-08-05  2:57           ` Ke, Liping
  1 sibling, 1 reply; 26+ messages in thread
From: Ke, Liping @ 2009-08-05  2:54 UTC (permalink / raw)
  To: Jeremy Fitzhardinge, Jan Beulich, Christoph Egger, Kleen, Andi,
	Keir Fraser

seems the mail is missing so resend it. Thanks!

Hi, all
I modified the patch according to all feedbacks.
Mainly changes inlcudes:
1. Rebased code to "rebase/master" branch 2.6.31-rc4. We fetch more
    useful physical cpu information which is useful to mcelog tools.
2. Modified the code for defects/ kernel conventions according to feedback.
3. Update out of date comments and add neccessary comments according
   to feedback.

This patch is only for your review. I will resend the formated patch later.
Also, I will re-sync the xen-mca.h head file changes back to XEN after review.

Thanks a lot for your help!
Criping

Jeremy Fitzhardinge wrote:
> On 07/30/09 02:23, Ke, Liping wrote:
>> Hi, Jan
>> 
>> I also noticed that in .31, several important missing field such as
>> apic_id, socket id is added. Those field is better to be filled too.
>> Since now pv_ops is based on .30, those changes haven't happen yet.
>> 
>> So what's your suggestion? Seems we have to keep the old code and
>> make 
>> a patch to it when .31 is supported in pv_ops?
>> 
> 
> The "rebase/master" branch is based on 2.6.31-rc4.  It looks like your
> patch doesn't really have any other code-level dependencies on the
> dom0 changes, so you could just base it on upstream 2.6.31-rc4.
> 
>     J

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-08-05  2:54         ` Ke, Liping
@ 2009-08-05  2:57           ` Ke, Liping
  0 siblings, 0 replies; 26+ messages in thread
From: Ke, Liping @ 2009-08-05  2:57 UTC (permalink / raw)
  To: Ke, Liping, Jeremy Fitzhardinge, Jan Beulich, Christoph Egger,
	Kleen, Andi

[-- Attachment #1: Type: text/plain, Size: 1556 bytes --]

Attachment.

Ke, Liping wrote:
> seems the mail is missing so resend it. Thanks!
> 
> Hi, all
> I modified the patch according to all feedbacks.
> Mainly changes inlcudes:
> 1. Rebased code to "rebase/master" branch 2.6.31-rc4. We fetch more
>     useful physical cpu information which is useful to mcelog tools.
> 2. Modified the code for defects/ kernel conventions according to
> feedback. 
> 3. Update out of date comments and add neccessary comments according
>    to feedback.
> 
> This patch is only for your review. I will resend the formated patch
> later. Also, I will re-sync the xen-mca.h head file changes back to
> XEN after review. 
> 
> Thanks a lot for your help!
> Criping
> 
> Jeremy Fitzhardinge wrote:
>> On 07/30/09 02:23, Ke, Liping wrote:
>>> Hi, Jan
>>> 
>>> I also noticed that in .31, several important missing field such as
>>> apic_id, socket id is added. Those field is better to be filled too.
>>> Since now pv_ops is based on .30, those changes haven't happen yet.
>>> 
>>> So what's your suggestion? Seems we have to keep the old code and
>>> make a patch to it when .31 is supported in pv_ops?
>>> 
>> 
>> The "rebase/master" branch is based on 2.6.31-rc4.  It looks like
>> your patch doesn't really have any other code-level dependencies on
>> the dom0 changes, so you could just base it on upstream 2.6.31-rc4.
>> 
>>     J
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel


[-- Attachment #2: mce_pvops.patch --]
[-- Type: application/octet-stream, Size: 23422 bytes --]

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3da450b..42a5601 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -46,6 +46,7 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/platform.h>
+#include <xen/interface/xen-mca.h>
 
 /*
  * The hypercall asms have to meet several constraints:
@@ -307,6 +308,13 @@ HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
 }
 
 static inline int
+HYPERVISOR_mca(struct xen_mc *mc_op)
+{
+	mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+	return _hypercall1(int, mca, mc_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 78970be..b6a9f3b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -217,13 +217,17 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 static __init void xen_init_cpuid_mask(void)
 {
 	unsigned int ax, bx, cx, dx;
-
+#if !defined(CONFIG_XEN_MCE)
 	cpuid_leaf1_edx_mask =
 		~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
 		  (1 << X86_FEATURE_MCA)  |  /* disable MCA */
 		  (1 << X86_FEATURE_PAT)  |  /* disable PAT */
 		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
-
+#else
+	cpuid_leaf1_edx_mask =
+		~((1 << X86_FEATURE_PAT)  |  /* disable PAT */
+		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+#endif
 	cpuid_leaf81_edx_mask = ~(1 << (X86_FEATURE_GBPAGES % 32));
 
 	if (!xen_initial_domain())
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 3b1c421..c627fee 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -88,6 +88,10 @@ config XEN_SYS_HYPERVISOR
 config XEN_XENBUS_FRONTEND
        tristate
 
+config XEN_MCE
+       def_bool y
+       depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
+
 config XEN_S3
        def_bool y
-       depends on XEN_DOM0 && ACPI
\ No newline at end of file
+       depends on XEN_DOM0 && ACPI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 386c775..c893102 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
 obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
-obj-$(CONFIG_XEN_S3)			+= acpi.o
\ No newline at end of file
+obj-$(CONFIG_XEN_MCE)	+= mce_dom0.o
+obj-$(CONFIG_XEN_S3)			+= acpi.o
diff --git a/drivers/xen/mce_dom0.c b/drivers/xen/mce_dom0.c
new file mode 100644
index 0000000..064616d
--- /dev/null
+++ b/drivers/xen/mce_dom0.c
@@ -0,0 +1,218 @@
+/******************************************************************************
+ * mce_dom0.c
+ * Add Machine Check event Logging support in DOM0
+ *
+ * Driver for receiving and logging machine check event
+ *
+ * Copyright (c) 2008, 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <xen/interface/xen.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/interface/vcpu.h>
+#include <asm/xen/hypercall.h>
+#include <asm/mce.h>
+
+static mc_info_t *g_mi;
+static mcinfo_logical_cpu_t *g_physinfo;
+static uint32_t ncpus;
+
+static int convert_log(struct mc_info *mi)
+{
+	struct mcinfo_common *mic = NULL;
+	struct mcinfo_global *mc_global;
+	struct mcinfo_bank *mc_bank;
+	struct mce m;
+	int i, found = 0;
+
+	x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
+	WARN_ON(!mic);
+
+	mce_setup(&m);
+	mc_global = (struct mcinfo_global*)mic;
+	m.mcgstatus = mc_global->mc_gstatus;
+	m.apicid = mc_global->mc_apicid;
+	for (i = 0; i < ncpus; i++ ) {
+		if (g_physinfo[i].mc_apicid ==  m.apicid)
+		{
+			found = 1;
+			break;
+		}
+	}
+	WARN_ON(!found);
+
+	m.socketid = g_physinfo[i].mc_chipid;
+	m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
+	m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
+	x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
+	do
+	{
+		if (mic == NULL || mic->size == 0)
+			break;
+		if (mic->type == MC_TYPE_BANK)
+		{
+			mc_bank = (struct mcinfo_bank*)mic;
+			m.misc = mc_bank->mc_misc;
+			m.status = mc_bank->mc_status;
+			m.addr = mc_bank->mc_addr;
+			m.tsc = mc_bank->mc_tsc;
+			m.bank = mc_bank->mc_bank;
+			/*log this record*/
+			mce_log(&m);
+		}
+		mic = x86_mcinfo_next(mic);
+	}
+	while (1);
+
+	return 0;
+}
+
+/*dom0 mce virq handler, logging physical mce error info*/
+static irqreturn_t mce_dom0_interrupt(int irq, void *dev_id)
+{
+	xen_mc_t mc_op;
+	int result = 0;
+
+	mc_op.cmd = XEN_MC_fetch;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
+urgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+		goto nonurgent;
+	else
+	{
+		result = convert_log(g_mi);
+		if (result)
+			goto end;
+		/* After fetching the error event log entry from DOM0, we need to dec
+		 * the refcnt and release the entry. The entry is reserved and inc
+		 * refcnt when filling the error log entry.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+		goto urgent;
+	}
+nonurgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+		goto end;
+	else
+	{
+		result = convert_log(g_mi);
+		if (result)
+			goto end;
+		/* After fetching the error event log entry from DOM0, we need to dec
+		 * the refcnt and release the entry. The entry is reserved and inc
+		 * refcnt when filling the error log entry.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+		goto nonurgent;
+	}
+end:
+	return IRQ_HANDLED;
+}
+
+static int bind_virq_for_mce(void)
+{
+	int ret;
+	xen_mc_t mc_op;
+
+	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+		mce_dom0_interrupt, 0, "mce", NULL);
+
+	if (ret < 0) {
+		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+		return ret;
+	}
+
+	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
+
+	if(!g_mi)
+		return -ENOMEM;
+
+	/* Fetch physical CPU Numbers */
+	mc_op.cmd = XEN_MC_physcpuinfo;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPU numbers\n");
+		kfree(g_mi);
+		return ret;
+	}
+
+	/* Fetch each CPU Physical Info for later reference*/
+	ncpus = mc_op.u.mc_physcpuinfo.ncpus;
+	g_physinfo = kmalloc(sizeof(struct mcinfo_logical_cpu)*ncpus, GFP_KERNEL);
+	if (!g_physinfo) {
+		kfree(g_mi);
+		return -ENOMEM;
+	}
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPUs info\n");
+		kfree(g_mi);
+		kfree(g_physinfo);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __init mcelog_init(void)
+{
+	/* Only DOM0 is responsible for MCE logging */
+	if (xen_initial_domain())
+		return bind_virq_for_mce();
+
+	return 0;
+}
+
+
+static void __exit mcelog_cleanup(void)
+{
+	if (g_mi)
+		kfree(g_mi);
+	if (g_physinfo)
+		kfree(g_physinfo);
+}
+module_init(mcelog_init);
+module_exit(mcelog_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
new file mode 100644
index 0000000..182f33c
--- /dev/null
+++ b/include/xen/interface/xen-mca.h
@@ -0,0 +1,424 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ * 
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ *    (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ *    (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ *    This is better done as separate task, physical CPU hotplugging,
+ *    and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ *    move a DomU (or Dom0 itself) away from a malicious page
+ *    producing correctable errors.
+ * 9. offlining physical page:
+ *    Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ *     and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+/*
+ * The xen-unstable repo has interface version 0x03000001; out interface
+ * is incompatible with that and any future minor revisions, so we
+ * choose a different version number range that is numerically less
+ * than that used in xen-unstable.
+ */
+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
+
+/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
+#define XEN_MC_NONURGENT  0x0001
+/* IN: Dom0/DomU calls hypercall to retrieve urgent error log entry */
+#define XEN_MC_URGENT     0x0002
+/* IN: Dom0 acknowledges previosly-fetched error log entry */
+#define XEN_MC_ACK        0x0004
+
+/* OUT: All is ok */
+#define XEN_MC_OK           0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED  0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA       0x2
+/* OUT: Between notification time and this hypercall an other
+ *  (most likely) correctable error happened. The fetched data,
+ *  does not match the original machine check data. */
+#define XEN_MC_NOMATCH      0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ */
+#define MC_TYPE_GLOBAL          0
+#define MC_TYPE_BANK            1
+#define MC_TYPE_EXTENDED        2
+#define MC_TYPE_RECOVERY        3
+
+struct mcinfo_common {
+    uint16_t type;      /* structure type */
+    uint16_t size;      /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE     (1 << 0)
+#define MC_FLAG_UNCORRECTABLE   (1 << 1)
+#define MC_FLAG_RECOVERABLE	(1 << 2)
+#define MC_FLAG_POLLED		(1 << 3)
+#define MC_FLAG_RESET		(1 << 4)
+#define MC_FLAG_CMCI		(1 << 5)
+#define MC_FLAG_MCE		(1 << 6)
+/* contains global x86 mc information */
+struct mcinfo_global {
+    struct mcinfo_common common;
+
+    /* running domain at the time in error (most likely the impacted one) */
+    uint16_t mc_domid;
+    uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+    uint32_t mc_socketid; /* physical socket of the physical core */
+    uint16_t mc_coreid; /* physical impacted core */
+    uint16_t mc_core_threadid; /* core thread of physical core */
+    uint32_t mc_apicid;
+    uint32_t mc_flags;
+    uint64_t mc_gstatus; /* global status */
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+    struct mcinfo_common common;
+
+    uint16_t mc_bank; /* bank nr */
+    uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0
+                        * and if mc_addr is valid. Never valid on DomU. */
+    uint64_t mc_status; /* bank status */
+    uint64_t mc_addr;   /* bank address, only valid
+                         * if addr bit is set in mc_status */
+    uint64_t mc_misc;
+    uint64_t mc_ctrl2;
+    uint64_t mc_tsc;
+};
+
+
+struct mcinfo_msr {
+    uint64_t reg;   /* MSR */
+    uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */ 
+struct mcinfo_extended {
+    struct mcinfo_common common;
+
+    /* You can fill up to five registers.
+     * If you need more, then use this structure
+     * multiple times. */
+
+    uint32_t mc_msrs; /* Number of msr with valid values. */
+    /*
+     * Currently Intel extended MSR (32/64) include all gp registers
+     * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
+     * useful at present. So expand this array to 16/32 to leave room.
+     */
+    struct mcinfo_msr mc_msr[sizeof(void *) * 4];
+};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action 
+ * information to DOM0. 
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in 
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action
+{
+    /* Params for passing the offlined page number to DOM0 */
+    uint64_t mfn;
+    uint64_t status;
+};
+
+struct cpu_offline_action
+{
+    /* Params for passing the identity of the offlined CPU to DOM0 */
+    uint32_t mc_socketid;
+    uint16_t mc_coreid;
+    uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mcinfo_recovery
+{
+    struct mcinfo_common common;
+    uint16_t mc_bank; /* bank nr */
+    /* Recovery Action Flags defined above such as REC_ACTION_DONE */
+    uint8_t action_flags;
+    /* Recovery Action types defined above such as MC_ACTION_PAGE_OFFLINE */
+    uint8_t action_types;
+    union {
+        struct page_offline_action page_retire;
+        struct cpu_offline_action cpu_offline;
+        uint8_t pad[MAX_UNION_SIZE];
+    } action_info;
+};
+
+
+#define MCINFO_HYPERCALLSIZE	1024
+#define MCINFO_MAXSIZE		768
+
+struct mc_info {
+    /* Number of mcinfo_* entries in mi_data */
+    uint32_t mi_nentries;
+    uint32_t _pad0;
+    uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
+};
+typedef struct mc_info mc_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(mc_info);
+
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS	7	/* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX	0	/* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX	1	/* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM	2	/* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX	3	/* Linux-defined */
+#define MC_CAPS_STD_ECX	4	/* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA	5	/* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX	6	/* cpuid level 0x80000001 (%ecx) */
+
+struct mcinfo_logical_cpu {
+    uint32_t mc_cpunr;          
+    uint32_t mc_chipid; 
+    uint16_t mc_coreid;
+    uint16_t mc_threadid;
+    uint32_t mc_apicid;
+    uint32_t mc_clusterid;
+    uint32_t mc_ncores;
+    uint32_t mc_ncores_active;
+    uint32_t mc_nthreads;
+    int32_t mc_cpuid_level;
+    uint32_t mc_family;
+    uint32_t mc_vendor;
+    uint32_t mc_model;
+    uint32_t mc_step;
+    char mc_vendorid[16];
+    char mc_brandid[64];
+    uint32_t mc_cpu_caps[MC_NCAPS];
+    uint32_t mc_cache_size;
+    uint32_t mc_cache_alignment;
+    int32_t mc_nmsrvals;
+    struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+};
+typedef struct mcinfo_logical_cpu mcinfo_logical_cpu_t;
+DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
+
+
+/* 
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi)    \
+    (_mi)->mi_nentries
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi)       \
+    ((struct mcinfo_common *)(_mi)->mi_data)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic)       \
+    ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
+
+/* Prototype:
+ *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+
+static inline void x86_mcinfo_lookup
+			(struct mcinfo_common **ret, struct mc_info *mi, uint16_t type)
+{
+	uint32_t found = 0, i;
+	struct mcinfo_common *mic;
+
+	*ret = NULL;
+	if ( !mi )
+		return;
+	mic = x86_mcinfo_first(mi);
+
+	for ( i = 0; i < x86_mcinfo_nentries(mi); i++ ) {
+		if ( mic->type == type) {
+			found = 1;
+			break;
+		}
+		mic = x86_mcinfo_next(mic);
+	}
+
+	*ret = found? mic : NULL;
+}
+/* Usecase 1
+ * Register machine check trap callback handler
+ *    (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch            1
+struct xen_mc_fetch {
+    /* IN/OUT variables. */
+    uint32_t flags;	/* IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
+                           XEN_MC_ACK if ack'ing an earlier fetch */
+			/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED,
+			   XEN_MC_NODATA, XEN_MC_NOMATCH */
+    uint32_t _pad0;
+    uint64_t fetch_id;	/* OUT: id for ack, IN: id we are ack'ing */
+
+    /* OUT variables. */
+    GUEST_HANDLE(mc_info) data;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain     2
+struct xen_mc_notifydomain {
+    /* IN variables. */
+    uint16_t mc_domid;    /* The unprivileged domain to notify. */
+    uint16_t mc_vcpuid;   /* The vcpu in mc_domid to notify.
+                           * Usually echo'd value from the fetch hypercall. */
+
+    /* IN/OUT variables. */
+    uint32_t flags;
+
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
+
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+	/* IN/OUT */
+	uint32_t ncpus;
+	uint32_t _pad0;
+	/* OUT */
+	GUEST_HANDLE(mcinfo_logical_cpu) info;
+};
+
+#define XEN_MC_msrinject    4
+#define MC_MSRINJ_MAXMSRS       8
+struct xen_mc_msrinject {
+       /* IN */
+	uint32_t mcinj_cpunr;           /* target processor id */
+	uint32_t mcinj_flags;           /* see MC_MSRINJ_F_* below */
+	uint32_t mcinj_count;           /* 0 .. count-1 in array are valid */
+	uint32_t _pad0;
+	struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE   0x1
+
+#define XEN_MC_mceinject    5
+struct xen_mc_mceinject {
+	unsigned int mceinj_cpunr;      /* target processor id */
+};
+
+struct xen_mc {
+    uint32_t cmd;
+    uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+    union {
+        struct xen_mc_fetch        mc_fetch;
+        struct xen_mc_notifydomain mc_notifydomain;
+        struct xen_mc_physcpuinfo  mc_physcpuinfo;
+        struct xen_mc_msrinject    mc_msrinject;
+        struct xen_mc_mceinject    mc_mceinject;
+    } u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in pv_ops
  2009-08-05  1:57   ` Ke, Liping
@ 2009-08-05  9:16     ` Christoph Egger
  2009-08-06  1:37       ` Ke, Liping
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Egger @ 2009-08-05  9:16 UTC (permalink / raw)
  To: Ke, Liping; +Cc: jeremy@goop.org, xen-devel@lists.xensource.com, Keir Fraser

On Wednesday 05 August 2009 03:57:20 Ke, Liping wrote:
> Hi, Christoph
> Please see my below comments.
>
> And also, I found some interfaces are different in pv_ops kernel such as
> GUEST_HANDLE related. Seems we can't keep the same copy of common
> file between XEN and GUEST.

That's not a problem as long as the ABI doesn't change.

> We have to do slight changes to the XEN  file before copying it to guest
> kernel. 

Well, the comment updates I suppose.

> And also, for the header file, I modified a little according to Andi's
> feedback such as gigantic macros will be unacceptable according to kernel
> code conventions, etc. So I modify x86_mcinfo_lookup into inline function.

NetBSD also has some "local" guest header changes which aren't accepted
by Keir due to Xen conventions.
Keep in mind that you have to merge the headers whenever you sync up with
Xen.

> I will resend the new patch to all of you for further feedback. After the
> patch is accepted, I will sync the modified head file back to XEN for
> consistency.

Please practise friendly actions for non-Linux guests when changing
the headers. Changing the macros for only one guest isn't a friendly action
for all guests.

Please only sync back the comment updates.

If NetBSD, Solaris and Linux were trying to have all local changes in Xen 
headers, they would become a mess.

> Thanks a lot for your help!
> Criping
>
> Christoph Egger wrote:
> > Hi Ke Liping!
> >
> > The xen mca public header needs some comment updates in the xen tree
> > first before it populates to the guests:
>
> Yes and thanks!
>
> > In line 102, uncorrectable errors are not reported via nmi handlers.
> > Please update the comment to match the code.
>
> OK, I will modify this comment

Tnx.

> > In struct mcinfo_recovery, it is not obvious that the REC_ACTION_*
> > flags are intended to be used in action_flags. Please add a comment
> > to make this clear.
> >
> > In struct mcinfo_recovery, it is not obvious that the MC_ACTION_*
> > flags are intended to be used in action_types. Actually are these
> > flags?
>
> OK, I will add comments on this

Tnx.

> > These are defined as a bitfield, but the union can't store more than
> > one action. Please add a comment to clarify this.
>
> Currently, We only support one action per bank. Do you have requirement
> on multiple actions per bank?

Yes, because support for one action per bank is a limitation in the
(Intel specific) implementation but not in design.

> > For xen_mc_notifydomain, it is not clear if this has to be used
> > before or after acknowledging the fetched telemetry. Please update
> > the comment
> > to clarify this.
> >
> > In xen_mc_notifydomain, the comment this the flags field may contain
> > XEN_MC_TRAP which doesn't exist. Please update the comment.
>
> According to the latest do_mca hypercall implementation, seems
> XEN_MC_notifydomain is unsupported, so I never use this hypercall.
> Also, I did not see XEN_MC_CORRECTABLE definition in current code too.
> So I will remove both of them in comments here.
> When this hypercall is implemented, the author should update this comments
> here?

The notifydomain hypercall is currently broken due to the design and 
implementation changes between Xen 3.3 and Xen 3.4. It got broken
because noone adapted it to work with the new design.
It is a method for the dom0 to notify a domU.

Right, you don't use them in your Linux patch but that doesn't imply that 
others like Solaris don't use it.

Christoph

> > On Thursday 30 July 2009 07:15:05 Ke, Liping wrote:
> >> Hi, Jeremy and Keir
> >>
> >> This patch is backport from DOM0 cs902
> >>
> >> When an MCE/CMCI error happens (or by polling), the related error
> >> information will be sent to DOM0 by XEN. This patch will help to
> >> fetch the xen-logged information by hypercall and then convert
> >> XEN-format log into Linux format MCELOG. It makes using current
> >> available mcelog tools for native Linux possible.
> >>
> >> With this patch, after mce/cmci error log information is sent to
> >> DOM0, running mcelog tools in DOM0, you will get same detailed
> >> decoded mce information as in Native Linux.
> >>
> >> Signed-Off-By: Liping Ke <liping.ke@intel.com>
> >> Signed-Off-By: Yunhong Jiang <yunhong.jiang@intel.com>
> >>
> >>
> >> Thanks& Regards,
> >> Criping



-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Thomas M. McCoy, Giuliano Meroni
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-08-05  2:13         ` Ke, Liping
@ 2009-08-05 22:55           ` Jeremy Fitzhardinge
  2009-08-07  2:32             ` Ke, Liping
  0 siblings, 1 reply; 26+ messages in thread
From: Jeremy Fitzhardinge @ 2009-08-05 22:55 UTC (permalink / raw)
  To: Ke, Liping
  Cc: Kleen, Andi, Christoph Egger, xen-devel@lists.xensource.com,
	Keir Fraser, Jan Beulich

On 08/04/09 19:13, Ke, Liping wrote:
> Hi, all
> I modified the patch according to all feedbacks.
> Mainly changes inlcudes:
> 1. Rebased code to "rebase/master" branch 2.6.31-rc4. We fetch more
>     useful physical cpu information which is useful to mcelog tools.
> 2. Modified the code for defects/ kernel conventions according to feedback.
> 3. Update out of date comments and add neccessary comments according
>    to feedback.
>
> This patch is only for your review. I will resend the formated patch later.
> Also, I will re-sync the xen-mca.h head file changes back to XEN after review.
>   

Could you resubmit this as a properly formed Linux patch, including:

   1. a one-line summary to explain the purpose of the patch
   2. a more detailed discussion of the patch contents, suitable for use
      as a commit comment
   3. a signed-off-by line

Thanks,
    J

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in pv_ops
  2009-08-05  9:16     ` Christoph Egger
@ 2009-08-06  1:37       ` Ke, Liping
  2009-08-06  7:27         ` Christoph Egger
  0 siblings, 1 reply; 26+ messages in thread
From: Ke, Liping @ 2009-08-06  1:37 UTC (permalink / raw)
  To: Christoph Egger
  Cc: jeremy@goop.org, xen-devel@lists.xensource.com, Keir Fraser

Hi, Christoph
Since we can't keep the same copy of header files for pv-ops and XEN already,
When sync back the header to XEN, I will only sync back the 
modified comments and other required changes. As for the change for 
programming conventions (inline function vs micro), I will not put them 
back to XEN.  Is it OK for you?

As for the union structure for recovery action, since no one is using it
now,I plan firstly to add a comment on it, something like "If more than 
one kind of recovery action perbank permited, union structure need to 
be changed". How do you think about it?

Thanks a lot!
Criping

Christoph Egger wrote:
> On Wednesday 05 August 2009 03:57:20 Ke, Liping wrote:
>> Hi, Christoph
>> Please see my below comments.
>> 
>> And also, I found some interfaces are different in pv_ops kernel
>> such as GUEST_HANDLE related. Seems we can't keep the same copy of
>> common 
>> file between XEN and GUEST.
> 
> That's not a problem as long as the ABI doesn't change.
> 
>> We have to do slight changes to the XEN  file before copying it to
>> guest kernel.
> 
> Well, the comment updates I suppose.
> 
>> And also, for the header file, I modified a little according to
>> Andi's feedback such as gigantic macros will be unacceptable
>> according to kernel code conventions, etc. So I modify
>> x86_mcinfo_lookup into inline function. 
> 
> NetBSD also has some "local" guest header changes which aren't
> accepted 
> by Keir due to Xen conventions.
> Keep in mind that you have to merge the headers whenever you sync up
> with Xen.
> 
>> I will resend the new patch to all of you for further feedback.
>> After the patch is accepted, I will sync the modified head file back
>> to XEN for consistency.
> 
> Please practise friendly actions for non-Linux guests when changing
> the headers. Changing the macros for only one guest isn't a friendly
> action for all guests.
> 
> Please only sync back the comment updates.
> 
> If NetBSD, Solaris and Linux were trying to have all local changes in
> Xen headers, they would become a mess.
> 
>> Thanks a lot for your help!
>> Criping

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in pv_ops
  2009-08-06  1:37       ` Ke, Liping
@ 2009-08-06  7:27         ` Christoph Egger
  2009-08-06  8:49           ` Christoph Egger
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Egger @ 2009-08-06  7:27 UTC (permalink / raw)
  To: Ke, Liping; +Cc: jeremy@goop.org, xen-devel@lists.xensource.com, Keir Fraser

On Thursday 06 August 2009 03:37:39 Ke, Liping wrote:
> Hi, Christoph
> Since we can't keep the same copy of header files for pv-ops and XEN
> already, When sync back the header to XEN, I will only sync back the
> modified comments and other required changes. As for the change for
> programming conventions (inline function vs micro), I will not put them
> back to XEN.  Is it OK for you?

Yes, that's fine.

>
> As for the union structure for recovery action, since no one is using it
> now,I plan firstly to add a comment on it, something like "If more than
> one kind of recovery action perbank permited, union structure need to
> be changed". How do you think about it?

That's fine with me. Tnx.

Christoph

> Thanks a lot!
> Criping
>
> Christoph Egger wrote:
> > On Wednesday 05 August 2009 03:57:20 Ke, Liping wrote:
> >> Hi, Christoph
> >> Please see my below comments.
> >>
> >> And also, I found some interfaces are different in pv_ops kernel
> >> such as GUEST_HANDLE related. Seems we can't keep the same copy of
> >> common
> >> file between XEN and GUEST.
> >
> > That's not a problem as long as the ABI doesn't change.
> >
> >> We have to do slight changes to the XEN  file before copying it to
> >> guest kernel.
> >
> > Well, the comment updates I suppose.
> >
> >> And also, for the header file, I modified a little according to
> >> Andi's feedback such as gigantic macros will be unacceptable
> >> according to kernel code conventions, etc. So I modify
> >> x86_mcinfo_lookup into inline function.
> >
> > NetBSD also has some "local" guest header changes which aren't
> > accepted
> > by Keir due to Xen conventions.
> > Keep in mind that you have to merge the headers whenever you sync up
> > with Xen.
> >
> >> I will resend the new patch to all of you for further feedback.
> >> After the patch is accepted, I will sync the modified head file back
> >> to XEN for consistency.
> >
> > Please practise friendly actions for non-Linux guests when changing
> > the headers. Changing the macros for only one guest isn't a friendly
> > action for all guests.
> >
> > Please only sync back the comment updates.
> >
> > If NetBSD, Solaris and Linux were trying to have all local changes in
> > Xen headers, they would become a mess.
> >
> >> Thanks a lot for your help!
> >> Criping



-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Thomas M. McCoy, Giuliano Meroni
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in pv_ops
  2009-08-06  7:27         ` Christoph Egger
@ 2009-08-06  8:49           ` Christoph Egger
  2009-08-07  1:48             ` Ke, Liping
  0 siblings, 1 reply; 26+ messages in thread
From: Christoph Egger @ 2009-08-06  8:49 UTC (permalink / raw)
  To: xen-devel; +Cc: jeremy@goop.org, Keir Fraser, Ke, Liping

On Thursday 06 August 2009 09:27:04 Christoph Egger wrote:
> On Thursday 06 August 2009 03:37:39 Ke, Liping wrote:
> > Hi, Christoph
> > Since we can't keep the same copy of header files for pv-ops and XEN
> > already, When sync back the header to XEN, I will only sync back the
> > modified comments and other required changes. As for the change for
> > programming conventions (inline function vs micro), I will not put them
> > back to XEN.  Is it OK for you?
>
> Yes, that's fine.
>
> > As for the union structure for recovery action, since no one is using it
> > now,I plan firstly to add a comment on it, something like "If more than
> > one kind of recovery action perbank permited, union structure need to
> > be changed". How do you think about it?
>
> That's fine with me. Tnx.

Ah wait! I should drink some coffee and then start to think. :)
The way you think is not how multiple actions per bank work.
By design support for multiple actions per bank is available via
getting multiple recovery actions in the mc_data returned from the fetch
hypercall. The comment should mention that.

Christoph

> > Thanks a lot!
> > Criping
> >
> > Christoph Egger wrote:
> > > On Wednesday 05 August 2009 03:57:20 Ke, Liping wrote:
> > >> Hi, Christoph
> > >> Please see my below comments.
> > >>
> > >> And also, I found some interfaces are different in pv_ops kernel
> > >> such as GUEST_HANDLE related. Seems we can't keep the same copy of
> > >> common
> > >> file between XEN and GUEST.
> > >
> > > That's not a problem as long as the ABI doesn't change.
> > >
> > >> We have to do slight changes to the XEN  file before copying it to
> > >> guest kernel.
> > >
> > > Well, the comment updates I suppose.
> > >
> > >> And also, for the header file, I modified a little according to
> > >> Andi's feedback such as gigantic macros will be unacceptable
> > >> according to kernel code conventions, etc. So I modify
> > >> x86_mcinfo_lookup into inline function.
> > >
> > > NetBSD also has some "local" guest header changes which aren't
> > > accepted
> > > by Keir due to Xen conventions.
> > > Keep in mind that you have to merge the headers whenever you sync up
> > > with Xen.
> > >
> > >> I will resend the new patch to all of you for further feedback.
> > >> After the patch is accepted, I will sync the modified head file back
> > >> to XEN for consistency.
> > >
> > > Please practise friendly actions for non-Linux guests when changing
> > > the headers. Changing the macros for only one guest isn't a friendly
> > > action for all guests.
> > >
> > > Please only sync back the comment updates.
> > >
> > > If NetBSD, Solaris and Linux were trying to have all local changes in
> > > Xen headers, they would become a mess.
> > >
> > >> Thanks a lot for your help!
> > >> Criping



-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Thomas M. McCoy, Giuliano Meroni
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in pv_ops
  2009-08-06  8:49           ` Christoph Egger
@ 2009-08-07  1:48             ` Ke, Liping
  0 siblings, 0 replies; 26+ messages in thread
From: Ke, Liping @ 2009-08-07  1:48 UTC (permalink / raw)
  To: Christoph Egger, xen-devel@lists.xensource.com
  Cc: jeremy@goop.org, Keir Fraser

Christoph Egger wrote:
>>> As for the union structure for recovery action, since no one is
>>> using it now,I plan firstly to add a comment on it, something like
>>> "If more than one kind of recovery action perbank permited, union
>>> structure need to be changed". How do you think about it?
>> 
>> That's fine with me. Tnx.
> 
> Ah wait! I should drink some coffee and then start to think. :)
> The way you think is not how multiple actions per bank work.
> By design support for multiple actions per bank is available via
> getting multiple recovery actions in the mc_data returned from the
> fetch hypercall. The comment should mention that.
> 
Oh, we remember the discussion before. Yes, if multiple recovery
actions needed, the fetch hypercall could return mcinfo_recovery data array. 
I will modify the comments:)

Thanks a lot!
Criping

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-08-05 22:55           ` Jeremy Fitzhardinge
@ 2009-08-07  2:32             ` Ke, Liping
  2009-08-07 18:07               ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 26+ messages in thread
From: Ke, Liping @ 2009-08-07  2:32 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Kleen, Andi, Christoph Egger, xen-devel@lists.xensource.com,
	Keir Fraser, Jan Beulich

[-- Attachment #1: Type: text/plain, Size: 1115 bytes --]

Hi, Jeremy and all

This is the new formated patch and I have tried linux patch format check tools on it.

Thanks a lot!
Regards,
Criping


Jeremy Fitzhardinge wrote:
> On 08/04/09 19:13, Ke, Liping wrote:
>> Hi, all
>> I modified the patch according to all feedbacks.
>> Mainly changes inlcudes:
>> 1. Rebased code to "rebase/master" branch 2.6.31-rc4. We fetch more
>>     useful physical cpu information which is useful to mcelog tools.
>> 2. Modified the code for defects/ kernel conventions according to
>> feedback. 
>> 3. Update out of date comments and add neccessary comments according
>> to feedback. 
>> 
>> This patch is only for your review. I will resend the formated patch
>> later. Also, I will re-sync the xen-mca.h head file changes back to
>> XEN after review. 
>> 
> 
> Could you resubmit this as a properly formed Linux patch, including:
> 
>    1. a one-line summary to explain the purpose of the patch
>    2. a more detailed discussion of the patch contents, suitable for
>       use as a commit comment
>    3. a signed-off-by line
> 
> Thanks,
>     J


[-- Attachment #2: 0001-Add-MCA-Logging-support-in-DOM0.patch --]
[-- Type: application/octet-stream, Size: 24383 bytes --]

From e6c023daa7c10dfeafc0c80eafedae0e22d24281 Mon Sep 17 00:00:00 2001
From: Liping Ke <liping.ke@intel.com>
Date: Fri, 7 Aug 2009 10:21:44 +0800
Subject: [PATCH] Add MCA Logging support in DOM0

This patch is backport from DOM0(2.6.18) cs902
When an MCE/CMCI error happens (or by polling), the related error
information will be sent to DOM0 by XEN. This patch will help to fetch
the xen-logged information by hypercall and then convert XEN-format
log into Linux format MCELOG. It makes using current available mcelog
tools for native Linux possible.
With this patch, after mce/cmci error log information is sent to DOM0,
running mcelog tools in DOM0, you will get same detailed decoded mce
information as in Native Linux.

Signed-off-by: Liping Ke <liping.ke@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
---
 arch/x86/include/asm/xen/hypercall.h |    8 +
 arch/x86/xen/enlighten.c             |    8 +-
 drivers/xen/Kconfig                  |    6 +-
 drivers/xen/Makefile                 |    3 +-
 drivers/xen/mce_dom0.c               |  213 +++++++++++++++++
 include/xen/interface/xen-mca.h      |  428 ++++++++++++++++++++++++++++++++++
 6 files changed, 662 insertions(+), 4 deletions(-)
 create mode 100644 drivers/xen/mce_dom0.c
 create mode 100644 include/xen/interface/xen-mca.h

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3da450b..42a5601 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -46,6 +46,7 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/platform.h>
+#include <xen/interface/xen-mca.h>
 
 /*
  * The hypercall asms have to meet several constraints:
@@ -307,6 +308,13 @@ HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
 }
 
 static inline int
+HYPERVISOR_mca(struct xen_mc *mc_op)
+{
+	mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+	return _hypercall1(int, mca, mc_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 78970be..b6a9f3b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -217,13 +217,17 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
 static __init void xen_init_cpuid_mask(void)
 {
 	unsigned int ax, bx, cx, dx;
-
+#if !defined(CONFIG_XEN_MCE)
 	cpuid_leaf1_edx_mask =
 		~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
 		  (1 << X86_FEATURE_MCA)  |  /* disable MCA */
 		  (1 << X86_FEATURE_PAT)  |  /* disable PAT */
 		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
-
+#else
+	cpuid_leaf1_edx_mask =
+		~((1 << X86_FEATURE_PAT)  |  /* disable PAT */
+		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+#endif
 	cpuid_leaf81_edx_mask = ~(1 << (X86_FEATURE_GBPAGES % 32));
 
 	if (!xen_initial_domain())
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 3b1c421..c627fee 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -88,6 +88,10 @@ config XEN_SYS_HYPERVISOR
 config XEN_XENBUS_FRONTEND
        tristate
 
+config XEN_MCE
+       def_bool y
+       depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
+
 config XEN_S3
        def_bool y
-       depends on XEN_DOM0 && ACPI
\ No newline at end of file
+       depends on XEN_DOM0 && ACPI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 386c775..c893102 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
 obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
-obj-$(CONFIG_XEN_S3)			+= acpi.o
\ No newline at end of file
+obj-$(CONFIG_XEN_MCE)	+= mce_dom0.o
+obj-$(CONFIG_XEN_S3)			+= acpi.o
diff --git a/drivers/xen/mce_dom0.c b/drivers/xen/mce_dom0.c
new file mode 100644
index 0000000..eb5360d
--- /dev/null
+++ b/drivers/xen/mce_dom0.c
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * mce_dom0.c
+ * Add Machine Check event Logging support in DOM0
+ *
+ * Driver for receiving and logging machine check event
+ *
+ * Copyright (c) 2008, 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <xen/interface/xen.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/interface/vcpu.h>
+#include <asm/xen/hypercall.h>
+#include <asm/mce.h>
+
+static mc_info_t *g_mi;
+static mcinfo_logical_cpu_t *g_physinfo;
+static uint32_t ncpus;
+
+static int convert_log(struct mc_info *mi)
+{
+	struct mcinfo_common *mic = NULL;
+	struct mcinfo_global *mc_global;
+	struct mcinfo_bank *mc_bank;
+	struct mce m;
+	int i, found = 0;
+
+	x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
+	WARN_ON(!mic);
+
+	mce_setup(&m);
+	mc_global = (struct mcinfo_global *)mic;
+	m.mcgstatus = mc_global->mc_gstatus;
+	m.apicid = mc_global->mc_apicid;
+	for (i = 0; i < ncpus; i++) {
+		if (g_physinfo[i].mc_apicid == m.apicid) {
+			found = 1;
+			break;
+		}
+	}
+	WARN_ON(!found);
+
+	m.socketid = g_physinfo[i].mc_chipid;
+	m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
+	m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
+	x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
+	do {
+		if (mic == NULL || mic->size == 0)
+			break;
+		if (mic->type == MC_TYPE_BANK) {
+			mc_bank = (struct mcinfo_bank *)mic;
+			m.misc = mc_bank->mc_misc;
+			m.status = mc_bank->mc_status;
+			m.addr = mc_bank->mc_addr;
+			m.tsc = mc_bank->mc_tsc;
+			m.bank = mc_bank->mc_bank;
+			/*log this record*/
+			mce_log(&m);
+		}
+		mic = x86_mcinfo_next(mic);
+	} while (1);
+
+	return 0;
+}
+
+/*dom0 mce virq handler, logging physical mce error info*/
+static irqreturn_t mce_dom0_interrupt(int irq, void *dev_id)
+{
+	xen_mc_t mc_op;
+	int result = 0;
+
+	mc_op.cmd = XEN_MC_fetch;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
+urgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+		goto nonurgent;
+	else {
+		result = convert_log(g_mi);
+		if (result)
+			goto end;
+		/* After fetching the error event log entry from DOM0,
+		 * we need to dec the refcnt and release the entry.
+		 * The entry is reserved and inc refcnt when filling
+		 * the error log entry.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+		goto urgent;
+	}
+nonurgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+		goto end;
+	else {
+		result = convert_log(g_mi);
+		if (result)
+			goto end;
+		/* After fetching the error event log entry from DOM0,
+		 * we need to dec the refcnt and release the entry. The
+		 * entry is reserved and inc refcnt when filling the
+		 * error log entry.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+		goto nonurgent;
+	}
+end:
+	return IRQ_HANDLED;
+}
+
+static int bind_virq_for_mce(void)
+{
+	int ret;
+	xen_mc_t mc_op;
+
+	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+		mce_dom0_interrupt, 0, "mce", NULL);
+
+	if (ret < 0) {
+		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+		return ret;
+	}
+
+	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
+
+	if (!g_mi)
+		return -ENOMEM;
+
+	/* Fetch physical CPU Numbers */
+	mc_op.cmd = XEN_MC_physcpuinfo;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPU numbers\n");
+		kfree(g_mi);
+		return ret;
+	}
+
+	/* Fetch each CPU Physical Info for later reference*/
+	ncpus = mc_op.u.mc_physcpuinfo.ncpus;
+	g_physinfo = kmalloc(sizeof(struct mcinfo_logical_cpu)*ncpus,
+					GFP_KERNEL);
+	if (!g_physinfo) {
+		kfree(g_mi);
+		return -ENOMEM;
+	}
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPUs info\n");
+		kfree(g_mi);
+		kfree(g_physinfo);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __init mcelog_init(void)
+{
+	/* Only DOM0 is responsible for MCE logging */
+	if (xen_initial_domain())
+		return bind_virq_for_mce();
+
+	return 0;
+}
+
+
+static void __exit mcelog_cleanup(void)
+{
+	kfree(g_mi);
+	kfree(g_physinfo);
+}
+module_init(mcelog_init);
+module_exit(mcelog_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
new file mode 100644
index 0000000..aba6b1f
--- /dev/null
+++ b/include/xen/interface/xen-mca.h
@@ -0,0 +1,428 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ *
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ *    (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ *    (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ *    This is better done as separate task, physical CPU hotplugging,
+ *    and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ *    move a DomU (or Dom0 itself) away from a malicious page
+ *    producing correctable errors.
+ * 9. offlining physical page:
+ *    Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ *     and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+/*
+ * The xen-unstable repo has interface version 0x03000001; out interface
+ * is incompatible with that and any future minor revisions, so we
+ * choose a different version number range that is numerically less
+ * than that used in xen-unstable.
+ */
+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
+
+/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
+#define XEN_MC_NONURGENT  0x0001
+/* IN: Dom0/DomU calls hypercall to retrieve urgent error log entry */
+#define XEN_MC_URGENT     0x0002
+/* IN: Dom0 acknowledges previosly-fetched error log entry */
+#define XEN_MC_ACK        0x0004
+
+/* OUT: All is ok */
+#define XEN_MC_OK           0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED  0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA       0x2
+/* OUT: Between notification time and this hypercall an other
+ *  (most likely) correctable error happened. The fetched data,
+ *  does not match the original machine check data. */
+#define XEN_MC_NOMATCH      0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ */
+#define MC_TYPE_GLOBAL          0
+#define MC_TYPE_BANK            1
+#define MC_TYPE_EXTENDED        2
+#define MC_TYPE_RECOVERY        3
+
+struct mcinfo_common {
+	uint16_t type;      /* structure type */
+	uint16_t size;      /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE     (1 << 0)
+#define MC_FLAG_UNCORRECTABLE   (1 << 1)
+#define MC_FLAG_RECOVERABLE	(1 << 2)
+#define MC_FLAG_POLLED		(1 << 3)
+#define MC_FLAG_RESET		(1 << 4)
+#define MC_FLAG_CMCI		(1 << 5)
+#define MC_FLAG_MCE		(1 << 6)
+/* contains global x86 mc information */
+struct mcinfo_global {
+	struct mcinfo_common common;
+
+	/* running domain at the time in error (most likely
+	 * the impacted one) */
+	uint16_t mc_domid;
+	uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+	uint32_t mc_socketid; /* physical socket of the physical core */
+	uint16_t mc_coreid; /* physical impacted core */
+	uint16_t mc_core_threadid; /* core thread of physical core */
+	uint32_t mc_apicid;
+	uint32_t mc_flags;
+	uint64_t mc_gstatus; /* global status */
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+	struct mcinfo_common common;
+
+	uint16_t mc_bank; /* bank nr */
+	uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on dom0
+			* and if mc_addr is valid. Never valid on DomU. */
+	uint64_t mc_status; /* bank status */
+	uint64_t mc_addr;   /* bank address, only valid
+					 * if addr bit is set in mc_status */
+	uint64_t mc_misc;
+	uint64_t mc_ctrl2;
+	uint64_t mc_tsc;
+};
+
+
+struct mcinfo_msr {
+	uint64_t reg;   /* MSR */
+	uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */
+struct mcinfo_extended {
+	struct mcinfo_common common;
+
+	/* You can fill up to five registers.
+	 * If you need more, then use this structure
+	 * multiple times. */
+
+	uint32_t mc_msrs; /* Number of msr with valid values. */
+	/*
+	 * Currently Intel extended MSR (32/64) include all gp registers
+	 * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
+	 * useful at present. So expand this array to 16/32 to leave room.
+	 */
+	struct mcinfo_msr mc_msr[sizeof(void *) * 4];
+};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action
+ * information to DOM0.
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action {
+	/* Params for passing the offlined page number to DOM0 */
+	uint64_t mfn;
+	uint64_t status;
+};
+
+struct cpu_offline_action {
+	/* Params for passing the identity of the offlined CPU to DOM0 */
+	uint32_t mc_socketid;
+	uint16_t mc_coreid;
+	uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mcinfo_recovery {
+	struct mcinfo_common common;
+	uint16_t mc_bank; /* bank nr */
+	/* Recovery Action Flags defined above such as REC_ACTION_DONE */
+	uint8_t action_flags;
+	/* Recovery Action types defined above such as MC_ACTION_PAGE_OFFLINE */
+	uint8_t action_types;
+	/* In future if more than one recovery action permitted per error bank,
+	 * a mcinfo_recovery data array will be returned
+	 */
+	union {
+		struct page_offline_action page_retire;
+		struct cpu_offline_action cpu_offline;
+		uint8_t pad[MAX_UNION_SIZE];
+	} action_info;
+};
+
+
+#define MCINFO_HYPERCALLSIZE	1024
+#define MCINFO_MAXSIZE		768
+
+struct mc_info {
+	/* Number of mcinfo_* entries in mi_data */
+	uint32_t mi_nentries;
+	uint32_t _pad0;
+	uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
+};
+typedef struct mc_info mc_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(mc_info);
+
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS	7	/* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX	0	/* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX	1	/* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM	2	/* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX	3	/* Linux-defined */
+#define MC_CAPS_STD_ECX	4	/* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA	5	/* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX	6	/* cpuid level 0x80000001 (%ecx) */
+
+struct mcinfo_logical_cpu {
+	uint32_t mc_cpunr;
+	uint32_t mc_chipid;
+	uint16_t mc_coreid;
+	uint16_t mc_threadid;
+	uint32_t mc_apicid;
+	uint32_t mc_clusterid;
+	uint32_t mc_ncores;
+	uint32_t mc_ncores_active;
+	uint32_t mc_nthreads;
+	int32_t mc_cpuid_level;
+	uint32_t mc_family;
+	uint32_t mc_vendor;
+	uint32_t mc_model;
+	uint32_t mc_step;
+	char mc_vendorid[16];
+	char mc_brandid[64];
+	uint32_t mc_cpu_caps[MC_NCAPS];
+	uint32_t mc_cache_size;
+	uint32_t mc_cache_alignment;
+	int32_t mc_nmsrvals;
+	struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+};
+typedef struct mcinfo_logical_cpu mcinfo_logical_cpu_t;
+DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
+
+
+/*
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi)    \
+    ((_mi)->mi_nentries)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi)       \
+    ((struct mcinfo_common *)(_mi)->mi_data)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic)       \
+    ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
+
+/* Prototype:
+ *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+
+static inline void x86_mcinfo_lookup
+	(struct mcinfo_common **ret, struct mc_info *mi, uint16_t type)
+{
+	uint32_t found = 0, i;
+	struct mcinfo_common *mic;
+
+	*ret = NULL;
+	if (!mi)
+		return;
+	mic = x86_mcinfo_first(mi);
+
+	for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
+		if (mic->type == type) {
+			found = 1;
+			break;
+		}
+		mic = x86_mcinfo_next(mic);
+	}
+
+	*ret = found ? mic : NULL;
+}
+/* Usecase 1
+ * Register machine check trap callback handler
+ *    (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch            1
+struct xen_mc_fetch {
+    /* IN/OUT variables.
+	 * IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
+	 * XEN_MC_ACK if ack'king an earlier fetch
+	 * OUT: XEN_MC_OK, XEN_MC_FETCHAILED,
+	 * XEN_MC_NODATA, XEN_MC_NOMATCH
+	*/
+	uint32_t flags;
+	uint32_t _pad0;
+	/* OUT: id for ack, IN: id we are ack'ing */
+	uint64_t fetch_id;
+
+	/* OUT variables. */
+	GUEST_HANDLE(mc_info) data;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain     2
+struct xen_mc_notifydomain {
+	/* IN variables. */
+	uint16_t mc_domid;/* The unprivileged domain to notify. */
+	uint16_t mc_vcpuid;/* The vcpu in mc_domid to notify.
+			* Usually echo'd value from the fetch hypercall. */
+
+	/* IN/OUT variables. */
+	uint32_t flags;
+
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
+
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+	/* IN/OUT */
+	uint32_t ncpus;
+	uint32_t _pad0;
+	/* OUT */
+	GUEST_HANDLE(mcinfo_logical_cpu) info;
+};
+
+#define XEN_MC_msrinject    4
+#define MC_MSRINJ_MAXMSRS       8
+struct xen_mc_msrinject {
+	/* IN */
+	uint32_t mcinj_cpunr;/* target processor id */
+	uint32_t mcinj_flags;/* see MC_MSRINJ_F_* below */
+	uint32_t mcinj_count;/* 0 .. count-1 in array are valid */
+	uint32_t _pad0;
+	struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE   0x1
+
+#define XEN_MC_mceinject    5
+struct xen_mc_mceinject {
+	unsigned int mceinj_cpunr;      /* target processor id */
+};
+
+struct xen_mc {
+	uint32_t cmd;
+	uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+	union {
+		struct xen_mc_fetch        mc_fetch;
+		struct xen_mc_notifydomain mc_notifydomain;
+		struct xen_mc_physcpuinfo  mc_physcpuinfo;
+		struct xen_mc_msrinject    mc_msrinject;
+		struct xen_mc_mceinject    mc_mceinject;
+	} u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
-- 
1.5.1


[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-08-07  2:32             ` Ke, Liping
@ 2009-08-07 18:07               ` Jeremy Fitzhardinge
  2009-08-10  3:08                 ` Ke, Liping
  0 siblings, 1 reply; 26+ messages in thread
From: Jeremy Fitzhardinge @ 2009-08-07 18:07 UTC (permalink / raw)
  To: Ke, Liping
  Cc: Kleen, Andi, Christoph Egger, xen-devel@lists.xensource.com,
	Keir Fraser, Jan Beulich

On 08/06/09 19:32, Ke, Liping wrote:
> Hi, Jeremy and all
>
> This is the new formated patch and I have tried linux patch format check tools on it.
>   

Thanks.  I have a couple of little comments:

    * when referring to Xen hg changesets, use the hex changeset ID
      rather than the number, since the number only makes sense locally
      (ie, your change 902 may not match anyone else's)
    * don't put "dom0" in the driver filename; drivers/xen/mce.c is
      enough.  Same with the various symbols and messages mentioning dom0.
    * don't use "#if define" if #ifdef would do; but try to avoid #ifdef
      anyway.  In particular, this *definitely* shouldn't be duplicating
      code between the #if and #else blocks.  At the very least, you
      should just need one #ifdef to re-add the MCE/MCA flags back into
      the mask.

      +#if !defined(CONFIG_XEN_MCE)
       	cpuid_leaf1_edx_mask =
       		~((1 << X86_FEATURE_MCE)  |  /* disable MCE */
       		  (1 << X86_FEATURE_MCA)  |  /* disable MCA */
       		  (1 << X86_FEATURE_PAT)  |  /* disable PAT */
       		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
      -
      +#else
      +	cpuid_leaf1_edx_mask =
      +		~((1 << X86_FEATURE_PAT)  |  /* disable PAT */
      +		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
      +#endif
          


Also, if you include the patch inline it makes it easier to review and
comment on.

Thanks,
    J

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-08-07 18:07               ` Jeremy Fitzhardinge
@ 2009-08-10  3:08                 ` Ke, Liping
  2009-08-13 20:55                   ` Jeremy Fitzhardinge
  2009-08-18  9:02                   ` Jan Beulich
  0 siblings, 2 replies; 26+ messages in thread
From: Ke, Liping @ 2009-08-10  3:08 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Kleen, Andi, Christoph Egger, xen-devel@lists.xensource.com,
	Keir Fraser, Jan Beulich

[-- Attachment #1: Type: text/plain, Size: 27766 bytes --]

Hi, Jeremy
Thanks for the detailed comments. I modified below:
1. change cs902 to cs: 75ebfa7fbdc
2. I renamed mce_dom0.c to mce.c and dom0 related message/variables.
3. use #ifdef CONFIG_XEN_MCE instead.
Thanks a lot!
Criping


>From e0815ca1fe7fbcc2f1de23608148e8d612974f2b Mon Sep 17 00:00:00 2001
From: Liping Ke <liping.ke@intel.com>
Date: Mon, 10 Aug 2009 11:04:21 +0800
Subject: [PATCH] Add MCA Logging support in pv-ops

This patch is backport from previous DOM0(2.6.18) cs: 75e5bfa7fbdc

When an MCE/CMCI error happens (or by polling), the related error
information will be sent to privileged pv-ops domain by XEN. This
patch will help to fetch the xen-logged information by hypercall
and then convert XEN-format log into Linux format MCELOG. It makes
using current available mcelog tools for native Linux possible.

With this patch, after mce/cmci error log information is sent to
pv-ops guest, Running mcelog tools in the guest, you will get same
detailed decoded mce information as in Native Linux.

Signed-off-by: Liping Ke <liping.ke@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
---
 arch/x86/include/asm/xen/hypercall.h |    8 +
 arch/x86/xen/enlighten.c             |    6 +
 drivers/xen/Kconfig                  |    6 +-
 drivers/xen/Makefile                 |    3 +-
 drivers/xen/mce.c                    |  213 +++++++++++++++++
 include/xen/interface/xen-mca.h      |  429 ++++++++++++++++++++++++++++++++++
 6 files changed, 663 insertions(+), 2 deletions(-)
 create mode 100644 drivers/xen/mce.c
 create mode 100644 include/xen/interface/xen-mca.h

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3da450b..42a5601 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -46,6 +46,7 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/platform.h>
+#include <xen/interface/xen-mca.h>

 /*
  * The hypercall asms have to meet several constraints:
@@ -307,6 +308,13 @@ HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
 }

 static inline int
+HYPERVISOR_mca(struct xen_mc *mc_op)
+{
+       mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+       return _hypercall1(int, mca, mc_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
        return _hypercall2(int, set_debugreg, reg, value);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 78970be..634543e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -224,6 +224,12 @@ static __init void xen_init_cpuid_mask(void)
                  (1 << X86_FEATURE_PAT)  |  /* disable PAT */
                  (1 << X86_FEATURE_ACC));   /* thermal monitoring */

+#ifdef CONFIG_XEN_MCE
+       cpuid_leaf1_edx_mask =
+               ~((1 << X86_FEATURE_PAT)  |  /* disable PAT */
+                 (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+#endif
+
        cpuid_leaf81_edx_mask = ~(1 << (X86_FEATURE_GBPAGES % 32));

        if (!xen_initial_domain())
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 3b1c421..c627fee 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -88,6 +88,10 @@ config XEN_SYS_HYPERVISOR
 config XEN_XENBUS_FRONTEND
        tristate

+config XEN_MCE
+       def_bool y
+       depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
+
 config XEN_S3
        def_bool y
-       depends on XEN_DOM0 && ACPI
\ No newline at end of file
+       depends on XEN_DOM0 && ACPI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 386c775..cd7b185 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND)        += blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
 obj-$(CONFIG_XENFS)                    += xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)       += sys-hypervisor.o
-obj-$(CONFIG_XEN_S3)                   += acpi.o
\ No newline at end of file
+obj-$(CONFIG_XEN_MCE)  += mce.o
+obj-$(CONFIG_XEN_S3)                   += acpi.o
diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c
new file mode 100644
index 0000000..ef838f2
--- /dev/null
+++ b/drivers/xen/mce.c
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * mce.c
+ * Add Machine Check event Logging support in DOM0
+ *
+ * Driver for receiving and logging machine check event
+ *
+ * Copyright (c) 2008, 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <xen/interface/xen.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/interface/vcpu.h>
+#include <asm/xen/hypercall.h>
+#include <asm/mce.h>
+
+static mc_info_t *g_mi;
+static mcinfo_logical_cpu_t *g_physinfo;
+static uint32_t ncpus;
+
+static int convert_log(struct mc_info *mi)
+{
+       struct mcinfo_common *mic = NULL;
+       struct mcinfo_global *mc_global;
+       struct mcinfo_bank *mc_bank;
+       struct mce m;
+       int i, found = 0;
+
+       x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
+       WARN_ON(!mic);
+
+       mce_setup(&m);
+       mc_global = (struct mcinfo_global *)mic;
+       m.mcgstatus = mc_global->mc_gstatus;
+       m.apicid = mc_global->mc_apicid;
+       for (i = 0; i < ncpus; i++) {
+               if (g_physinfo[i].mc_apicid == m.apicid) {
+                       found = 1;
+                       break;
+               }
+       }
+       WARN_ON(!found);
+
+       m.socketid = g_physinfo[i].mc_chipid;
+       m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
+       m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
+       x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
+       do {
+               if (mic == NULL || mic->size == 0)
+                       break;
+               if (mic->type == MC_TYPE_BANK) {
+                       mc_bank = (struct mcinfo_bank *)mic;
+                       m.misc = mc_bank->mc_misc;
+                       m.status = mc_bank->mc_status;
+                       m.addr = mc_bank->mc_addr;
+                       m.tsc = mc_bank->mc_tsc;
+                       m.bank = mc_bank->mc_bank;
+                       /*log this record*/
+                       mce_log(&m);
+               }
+               mic = x86_mcinfo_next(mic);
+       } while (1);
+
+       return 0;
+}
+
+/*pv_ops domain mce virq handler, logging physical mce error info*/
+static irqreturn_t mce_dom_interrupt(int irq, void *dev_id)
+{
+       xen_mc_t mc_op;
+       int result = 0;
+
+       mc_op.cmd = XEN_MC_fetch;
+       mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+       set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
+urgent:
+       mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
+       result = HYPERVISOR_mca(&mc_op);
+       if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+                       mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+               goto nonurgent;
+       else {
+               result = convert_log(g_mi);
+               if (result)
+                       goto end;
+               /* After fetching the error event log entry from DOM0,
+                * we need to dec the refcnt and release the entry.
+                * The entry is reserved and inc refcnt when filling
+                * the error log entry.
+                */
+               mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
+               result = HYPERVISOR_mca(&mc_op);
+               goto urgent;
+       }
+nonurgent:
+       mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
+       result = HYPERVISOR_mca(&mc_op);
+       if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+                       mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+               goto end;
+       else {
+               result = convert_log(g_mi);
+               if (result)
+                       goto end;
+               /* After fetching the error event log entry from DOM0,
+                * we need to dec the refcnt and release the entry. The
+                * entry is reserved and inc refcnt when filling the
+                * error log entry.
+                */
+               mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
+               result = HYPERVISOR_mca(&mc_op);
+               goto nonurgent;
+       }
+end:
+       return IRQ_HANDLED;
+}
+
+static int bind_virq_for_mce(void)
+{
+       int ret;
+       xen_mc_t mc_op;
+
+       ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+               mce_dom_interrupt, 0, "mce", NULL);
+
+       if (ret < 0) {
+               printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+               return ret;
+       }
+
+       g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
+
+       if (!g_mi)
+               return -ENOMEM;
+
+       /* Fetch physical CPU Numbers */
+       mc_op.cmd = XEN_MC_physcpuinfo;
+       mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+       set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+       ret = HYPERVISOR_mca(&mc_op);
+       if (ret) {
+               printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPU numbers\n");
+               kfree(g_mi);
+               return ret;
+       }
+
+       /* Fetch each CPU Physical Info for later reference*/
+       ncpus = mc_op.u.mc_physcpuinfo.ncpus;
+       g_physinfo = kmalloc(sizeof(struct mcinfo_logical_cpu)*ncpus,
+                                       GFP_KERNEL);
+       if (!g_physinfo) {
+               kfree(g_mi);
+               return -ENOMEM;
+       }
+       set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+       ret = HYPERVISOR_mca(&mc_op);
+       if (ret) {
+               printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPUs info\n");
+               kfree(g_mi);
+               kfree(g_physinfo);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int __init mcelog_init(void)
+{
+       /* Only DOM0 is responsible for MCE logging */
+       if (xen_initial_domain())
+               return bind_virq_for_mce();
+
+       return 0;
+}
+
+
+static void __exit mcelog_cleanup(void)
+{
+       kfree(g_mi);
+       kfree(g_physinfo);
+}
+module_init(mcelog_init);
+module_exit(mcelog_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
new file mode 100644
index 0000000..f31fdab
--- /dev/null
+++ b/include/xen/interface/xen-mca.h
@@ -0,0 +1,429 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ *
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ *    (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ *    (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ *    This is better done as separate task, physical CPU hotplugging,
+ *    and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ *    move a DomU (or Dom0 itself) away from a malicious page
+ *    producing correctable errors.
+ * 9. offlining physical page:
+ *    Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ *     and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+/*
+ * The xen-unstable repo has interface version 0x03000001; out interface
+ * is incompatible with that and any future minor revisions, so we
+ * choose a different version number range that is numerically less
+ * than that used in xen-unstable.
+ */
+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
+
+/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
+#define XEN_MC_NONURGENT  0x0001
+/* IN: Dom0/DomU calls hypercall to retrieve urgent error log entry */
+#define XEN_MC_URGENT     0x0002
+/* IN: Dom0 acknowledges previosly-fetched error log entry */
+#define XEN_MC_ACK        0x0004
+
+/* OUT: All is ok */
+#define XEN_MC_OK           0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED  0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA       0x2
+/* OUT: Between notification time and this hypercall an other
+ *  (most likely) correctable error happened. The fetched data,
+ *  does not match the original machine check data. */
+#define XEN_MC_NOMATCH      0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ */
+#define MC_TYPE_GLOBAL          0
+#define MC_TYPE_BANK            1
+#define MC_TYPE_EXTENDED        2
+#define MC_TYPE_RECOVERY        3
+
+struct mcinfo_common {
+       uint16_t type;      /* structure type */
+       uint16_t size;      /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE     (1 << 0)
+#define MC_FLAG_UNCORRECTABLE   (1 << 1)
+#define MC_FLAG_RECOVERABLE    (1 << 2)
+#define MC_FLAG_POLLED         (1 << 3)
+#define MC_FLAG_RESET          (1 << 4)
+#define MC_FLAG_CMCI           (1 << 5)
+#define MC_FLAG_MCE            (1 << 6)
+/* contains global x86 mc information */
+struct mcinfo_global {
+       struct mcinfo_common common;
+
+       /* running domain at the time in error (most likely
+        * the impacted one) */
+       uint16_t mc_domid;
+       uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+       uint32_t mc_socketid; /* physical socket of the physical core */
+       uint16_t mc_coreid; /* physical impacted core */
+       uint16_t mc_core_threadid; /* core thread of physical core */
+       uint32_t mc_apicid;
+       uint32_t mc_flags;
+       uint64_t mc_gstatus; /* global status */
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+       struct mcinfo_common common;
+
+       uint16_t mc_bank; /* bank nr */
+       uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on
+                       * privileged pv-ops dom and if mc_addr is valid.
+                       * Never valid on DomU. */
+       uint64_t mc_status; /* bank status */
+       uint64_t mc_addr;   /* bank address, only valid
+                                        * if addr bit is set in mc_status */
+       uint64_t mc_misc;
+       uint64_t mc_ctrl2;
+       uint64_t mc_tsc;
+};
+
+
+struct mcinfo_msr {
+       uint64_t reg;   /* MSR */
+       uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */
+struct mcinfo_extended {
+       struct mcinfo_common common;
+
+       /* You can fill up to five registers.
+        * If you need more, then use this structure
+        * multiple times. */
+
+       uint32_t mc_msrs; /* Number of msr with valid values. */
+       /*
+        * Currently Intel extended MSR (32/64) include all gp registers
+        * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
+        * useful at present. So expand this array to 16/32 to leave room.
+        */
+       struct mcinfo_msr mc_msr[sizeof(void *) * 4];
+};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action
+ * information to DOM0.
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action {
+       /* Params for passing the offlined page number to DOM0 */
+       uint64_t mfn;
+       uint64_t status;
+};
+
+struct cpu_offline_action {
+       /* Params for passing the identity of the offlined CPU to DOM0 */
+       uint32_t mc_socketid;
+       uint16_t mc_coreid;
+       uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mcinfo_recovery {
+       struct mcinfo_common common;
+       uint16_t mc_bank; /* bank nr */
+       /* Recovery Action Flags defined above such as REC_ACTION_DONE */
+       uint8_t action_flags;
+       /* Recovery Action types defined above such as MC_ACTION_PAGE_OFFLINE */
+       uint8_t action_types;
+       /* In future if more than one recovery action permitted per error bank,
+        * a mcinfo_recovery data array will be returned
+        */
+       union {
+               struct page_offline_action page_retire;
+               struct cpu_offline_action cpu_offline;
+               uint8_t pad[MAX_UNION_SIZE];
+       } action_info;
+};
+
+
+#define MCINFO_HYPERCALLSIZE   1024
+#define MCINFO_MAXSIZE         768
+
+struct mc_info {
+       /* Number of mcinfo_* entries in mi_data */
+       uint32_t mi_nentries;
+       uint32_t _pad0;
+       uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
+};
+typedef struct mc_info mc_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(mc_info);
+
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS       7       /* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX        0       /* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX        1       /* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM     2       /* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX  3       /* Linux-defined */
+#define MC_CAPS_STD_ECX        4       /* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA    5       /* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX        6       /* cpuid level 0x80000001 (%ecx) */
+
+struct mcinfo_logical_cpu {
+       uint32_t mc_cpunr;
+       uint32_t mc_chipid;
+       uint16_t mc_coreid;
+       uint16_t mc_threadid;
+       uint32_t mc_apicid;
+       uint32_t mc_clusterid;
+       uint32_t mc_ncores;
+       uint32_t mc_ncores_active;
+       uint32_t mc_nthreads;
+       int32_t mc_cpuid_level;
+       uint32_t mc_family;
+       uint32_t mc_vendor;
+       uint32_t mc_model;
+       uint32_t mc_step;
+       char mc_vendorid[16];
+       char mc_brandid[64];
+       uint32_t mc_cpu_caps[MC_NCAPS];
+       uint32_t mc_cache_size;
+       uint32_t mc_cache_alignment;
+       int32_t mc_nmsrvals;
+       struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+};
+typedef struct mcinfo_logical_cpu mcinfo_logical_cpu_t;
+DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
+
+
+/*
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi)    \
+    ((_mi)->mi_nentries)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi)       \
+    ((struct mcinfo_common *)(_mi)->mi_data)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic)       \
+    ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
+
+/* Prototype:
+ *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+
+static inline void x86_mcinfo_lookup
+       (struct mcinfo_common **ret, struct mc_info *mi, uint16_t type)
+{
+       uint32_t found = 0, i;
+       struct mcinfo_common *mic;
+
+       *ret = NULL;
+       if (!mi)
+               return;
+       mic = x86_mcinfo_first(mi);
+
+       for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
+               if (mic->type == type) {
+                       found = 1;
+                       break;
+               }
+               mic = x86_mcinfo_next(mic);
+       }
+
+       *ret = found ? mic : NULL;
+}
+/* Usecase 1
+ * Register machine check trap callback handler
+ *    (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch            1
+struct xen_mc_fetch {
+    /* IN/OUT variables.
+        * IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
+        * XEN_MC_ACK if ack'king an earlier fetch
+        * OUT: XEN_MC_OK, XEN_MC_FETCHAILED,
+        * XEN_MC_NODATA, XEN_MC_NOMATCH
+       */
+       uint32_t flags;
+       uint32_t _pad0;
+       /* OUT: id for ack, IN: id we are ack'ing */
+       uint64_t fetch_id;
+
+       /* OUT variables. */
+       GUEST_HANDLE(mc_info) data;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain     2
+struct xen_mc_notifydomain {
+       /* IN variables. */
+       uint16_t mc_domid;/* The unprivileged domain to notify. */
+       uint16_t mc_vcpuid;/* The vcpu in mc_domid to notify.
+                       * Usually echo'd value from the fetch hypercall. */
+
+       /* IN/OUT variables. */
+       uint32_t flags;
+
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
+
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+       /* IN/OUT */
+       uint32_t ncpus;
+       uint32_t _pad0;
+       /* OUT */
+       GUEST_HANDLE(mcinfo_logical_cpu) info;
+};
+
+#define XEN_MC_msrinject    4
+#define MC_MSRINJ_MAXMSRS       8
+struct xen_mc_msrinject {
+       /* IN */
+       uint32_t mcinj_cpunr;/* target processor id */
+       uint32_t mcinj_flags;/* see MC_MSRINJ_F_* below */
+       uint32_t mcinj_count;/* 0 .. count-1 in array are valid */
+       uint32_t _pad0;
+       struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE   0x1
+
+#define XEN_MC_mceinject    5
+struct xen_mc_mceinject {
+       unsigned int mceinj_cpunr;      /* target processor id */
+};
+
+struct xen_mc {
+       uint32_t cmd;
+       uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+       union {
+               struct xen_mc_fetch        mc_fetch;
+               struct xen_mc_notifydomain mc_notifydomain;
+               struct xen_mc_physcpuinfo  mc_physcpuinfo;
+               struct xen_mc_msrinject    mc_msrinject;
+               struct xen_mc_mceinject    mc_mceinject;
+       } u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
--
1.5.1


[-- Attachment #2: 0001-Add-MCA-Logging-support-in-pv-ops.patch --]
[-- Type: application/octet-stream, Size: 24207 bytes --]

From e0815ca1fe7fbcc2f1de23608148e8d612974f2b Mon Sep 17 00:00:00 2001
From: Liping Ke <liping.ke@intel.com>
Date: Mon, 10 Aug 2009 11:04:21 +0800
Subject: [PATCH] Add MCA Logging support in pv-ops

This patch is backport from previous DOM0(2.6.18) cs: 75e5bfa7fbdc

When an MCE/CMCI error happens (or by polling), the related error
information will be sent to privileged pv-ops domain by XEN. This
patch will help to fetch the xen-logged information by hypercall
and then convert XEN-format log into Linux format MCELOG. It makes
using current available mcelog tools for native Linux possible.

With this patch, after mce/cmci error log information is sent to
pv-ops guest, Running mcelog tools in the guest, you will get same
detailed decoded mce information as in Native Linux.

Signed-off-by: Liping Ke <liping.ke@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
---
 arch/x86/include/asm/xen/hypercall.h |    8 +
 arch/x86/xen/enlighten.c             |    6 +
 drivers/xen/Kconfig                  |    6 +-
 drivers/xen/Makefile                 |    3 +-
 drivers/xen/mce.c                    |  213 +++++++++++++++++
 include/xen/interface/xen-mca.h      |  429 ++++++++++++++++++++++++++++++++++
 6 files changed, 663 insertions(+), 2 deletions(-)
 create mode 100644 drivers/xen/mce.c
 create mode 100644 include/xen/interface/xen-mca.h

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3da450b..42a5601 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -46,6 +46,7 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/platform.h>
+#include <xen/interface/xen-mca.h>
 
 /*
  * The hypercall asms have to meet several constraints:
@@ -307,6 +308,13 @@ HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
 }
 
 static inline int
+HYPERVISOR_mca(struct xen_mc *mc_op)
+{
+	mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+	return _hypercall1(int, mca, mc_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 78970be..634543e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -224,6 +224,12 @@ static __init void xen_init_cpuid_mask(void)
 		  (1 << X86_FEATURE_PAT)  |  /* disable PAT */
 		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 
+#ifdef CONFIG_XEN_MCE
+	cpuid_leaf1_edx_mask =
+		~((1 << X86_FEATURE_PAT)  |  /* disable PAT */
+		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+#endif
+
 	cpuid_leaf81_edx_mask = ~(1 << (X86_FEATURE_GBPAGES % 32));
 
 	if (!xen_initial_domain())
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 3b1c421..c627fee 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -88,6 +88,10 @@ config XEN_SYS_HYPERVISOR
 config XEN_XENBUS_FRONTEND
        tristate
 
+config XEN_MCE
+       def_bool y
+       depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
+
 config XEN_S3
        def_bool y
-       depends on XEN_DOM0 && ACPI
\ No newline at end of file
+       depends on XEN_DOM0 && ACPI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 386c775..cd7b185 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
 obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
-obj-$(CONFIG_XEN_S3)			+= acpi.o
\ No newline at end of file
+obj-$(CONFIG_XEN_MCE)	+= mce.o
+obj-$(CONFIG_XEN_S3)			+= acpi.o
diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c
new file mode 100644
index 0000000..ef838f2
--- /dev/null
+++ b/drivers/xen/mce.c
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * mce.c
+ * Add Machine Check event Logging support in DOM0
+ *
+ * Driver for receiving and logging machine check event
+ *
+ * Copyright (c) 2008, 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <xen/interface/xen.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/interface/vcpu.h>
+#include <asm/xen/hypercall.h>
+#include <asm/mce.h>
+
+static mc_info_t *g_mi;
+static mcinfo_logical_cpu_t *g_physinfo;
+static uint32_t ncpus;
+
+static int convert_log(struct mc_info *mi)
+{
+	struct mcinfo_common *mic = NULL;
+	struct mcinfo_global *mc_global;
+	struct mcinfo_bank *mc_bank;
+	struct mce m;
+	int i, found = 0;
+
+	x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
+	WARN_ON(!mic);
+
+	mce_setup(&m);
+	mc_global = (struct mcinfo_global *)mic;
+	m.mcgstatus = mc_global->mc_gstatus;
+	m.apicid = mc_global->mc_apicid;
+	for (i = 0; i < ncpus; i++) {
+		if (g_physinfo[i].mc_apicid == m.apicid) {
+			found = 1;
+			break;
+		}
+	}
+	WARN_ON(!found);
+
+	m.socketid = g_physinfo[i].mc_chipid;
+	m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
+	m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
+	x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
+	do {
+		if (mic == NULL || mic->size == 0)
+			break;
+		if (mic->type == MC_TYPE_BANK) {
+			mc_bank = (struct mcinfo_bank *)mic;
+			m.misc = mc_bank->mc_misc;
+			m.status = mc_bank->mc_status;
+			m.addr = mc_bank->mc_addr;
+			m.tsc = mc_bank->mc_tsc;
+			m.bank = mc_bank->mc_bank;
+			/*log this record*/
+			mce_log(&m);
+		}
+		mic = x86_mcinfo_next(mic);
+	} while (1);
+
+	return 0;
+}
+
+/*pv_ops domain mce virq handler, logging physical mce error info*/
+static irqreturn_t mce_dom_interrupt(int irq, void *dev_id)
+{
+	xen_mc_t mc_op;
+	int result = 0;
+
+	mc_op.cmd = XEN_MC_fetch;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
+urgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+		goto nonurgent;
+	else {
+		result = convert_log(g_mi);
+		if (result)
+			goto end;
+		/* After fetching the error event log entry from DOM0,
+		 * we need to dec the refcnt and release the entry.
+		 * The entry is reserved and inc refcnt when filling
+		 * the error log entry.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+		goto urgent;
+	}
+nonurgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+		goto end;
+	else {
+		result = convert_log(g_mi);
+		if (result)
+			goto end;
+		/* After fetching the error event log entry from DOM0,
+		 * we need to dec the refcnt and release the entry. The
+		 * entry is reserved and inc refcnt when filling the
+		 * error log entry.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+		goto nonurgent;
+	}
+end:
+	return IRQ_HANDLED;
+}
+
+static int bind_virq_for_mce(void)
+{
+	int ret;
+	xen_mc_t mc_op;
+
+	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+		mce_dom_interrupt, 0, "mce", NULL);
+
+	if (ret < 0) {
+		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+		return ret;
+	}
+
+	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
+
+	if (!g_mi)
+		return -ENOMEM;
+
+	/* Fetch physical CPU Numbers */
+	mc_op.cmd = XEN_MC_physcpuinfo;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPU numbers\n");
+		kfree(g_mi);
+		return ret;
+	}
+
+	/* Fetch each CPU Physical Info for later reference*/
+	ncpus = mc_op.u.mc_physcpuinfo.ncpus;
+	g_physinfo = kmalloc(sizeof(struct mcinfo_logical_cpu)*ncpus,
+					GFP_KERNEL);
+	if (!g_physinfo) {
+		kfree(g_mi);
+		return -ENOMEM;
+	}
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPUs info\n");
+		kfree(g_mi);
+		kfree(g_physinfo);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __init mcelog_init(void)
+{
+	/* Only DOM0 is responsible for MCE logging */
+	if (xen_initial_domain())
+		return bind_virq_for_mce();
+
+	return 0;
+}
+
+
+static void __exit mcelog_cleanup(void)
+{
+	kfree(g_mi);
+	kfree(g_physinfo);
+}
+module_init(mcelog_init);
+module_exit(mcelog_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
new file mode 100644
index 0000000..f31fdab
--- /dev/null
+++ b/include/xen/interface/xen-mca.h
@@ -0,0 +1,429 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ *
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ *    (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ *    (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ *    This is better done as separate task, physical CPU hotplugging,
+ *    and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ *    move a DomU (or Dom0 itself) away from a malicious page
+ *    producing correctable errors.
+ * 9. offlining physical page:
+ *    Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ *     and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+/*
+ * The xen-unstable repo has interface version 0x03000001; out interface
+ * is incompatible with that and any future minor revisions, so we
+ * choose a different version number range that is numerically less
+ * than that used in xen-unstable.
+ */
+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
+
+/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
+#define XEN_MC_NONURGENT  0x0001
+/* IN: Dom0/DomU calls hypercall to retrieve urgent error log entry */
+#define XEN_MC_URGENT     0x0002
+/* IN: Dom0 acknowledges previosly-fetched error log entry */
+#define XEN_MC_ACK        0x0004
+
+/* OUT: All is ok */
+#define XEN_MC_OK           0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED  0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA       0x2
+/* OUT: Between notification time and this hypercall an other
+ *  (most likely) correctable error happened. The fetched data,
+ *  does not match the original machine check data. */
+#define XEN_MC_NOMATCH      0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ */
+#define MC_TYPE_GLOBAL          0
+#define MC_TYPE_BANK            1
+#define MC_TYPE_EXTENDED        2
+#define MC_TYPE_RECOVERY        3
+
+struct mcinfo_common {
+	uint16_t type;      /* structure type */
+	uint16_t size;      /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE     (1 << 0)
+#define MC_FLAG_UNCORRECTABLE   (1 << 1)
+#define MC_FLAG_RECOVERABLE	(1 << 2)
+#define MC_FLAG_POLLED		(1 << 3)
+#define MC_FLAG_RESET		(1 << 4)
+#define MC_FLAG_CMCI		(1 << 5)
+#define MC_FLAG_MCE		(1 << 6)
+/* contains global x86 mc information */
+struct mcinfo_global {
+	struct mcinfo_common common;
+
+	/* running domain at the time in error (most likely
+	 * the impacted one) */
+	uint16_t mc_domid;
+	uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+	uint32_t mc_socketid; /* physical socket of the physical core */
+	uint16_t mc_coreid; /* physical impacted core */
+	uint16_t mc_core_threadid; /* core thread of physical core */
+	uint32_t mc_apicid;
+	uint32_t mc_flags;
+	uint64_t mc_gstatus; /* global status */
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+	struct mcinfo_common common;
+
+	uint16_t mc_bank; /* bank nr */
+	uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on
+			* privileged pv-ops dom and if mc_addr is valid.
+			* Never valid on DomU. */
+	uint64_t mc_status; /* bank status */
+	uint64_t mc_addr;   /* bank address, only valid
+					 * if addr bit is set in mc_status */
+	uint64_t mc_misc;
+	uint64_t mc_ctrl2;
+	uint64_t mc_tsc;
+};
+
+
+struct mcinfo_msr {
+	uint64_t reg;   /* MSR */
+	uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */
+struct mcinfo_extended {
+	struct mcinfo_common common;
+
+	/* You can fill up to five registers.
+	 * If you need more, then use this structure
+	 * multiple times. */
+
+	uint32_t mc_msrs; /* Number of msr with valid values. */
+	/*
+	 * Currently Intel extended MSR (32/64) include all gp registers
+	 * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
+	 * useful at present. So expand this array to 16/32 to leave room.
+	 */
+	struct mcinfo_msr mc_msr[sizeof(void *) * 4];
+};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action
+ * information to DOM0.
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action {
+	/* Params for passing the offlined page number to DOM0 */
+	uint64_t mfn;
+	uint64_t status;
+};
+
+struct cpu_offline_action {
+	/* Params for passing the identity of the offlined CPU to DOM0 */
+	uint32_t mc_socketid;
+	uint16_t mc_coreid;
+	uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mcinfo_recovery {
+	struct mcinfo_common common;
+	uint16_t mc_bank; /* bank nr */
+	/* Recovery Action Flags defined above such as REC_ACTION_DONE */
+	uint8_t action_flags;
+	/* Recovery Action types defined above such as MC_ACTION_PAGE_OFFLINE */
+	uint8_t action_types;
+	/* In future if more than one recovery action permitted per error bank,
+	 * a mcinfo_recovery data array will be returned
+	 */
+	union {
+		struct page_offline_action page_retire;
+		struct cpu_offline_action cpu_offline;
+		uint8_t pad[MAX_UNION_SIZE];
+	} action_info;
+};
+
+
+#define MCINFO_HYPERCALLSIZE	1024
+#define MCINFO_MAXSIZE		768
+
+struct mc_info {
+	/* Number of mcinfo_* entries in mi_data */
+	uint32_t mi_nentries;
+	uint32_t _pad0;
+	uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
+};
+typedef struct mc_info mc_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(mc_info);
+
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS	7	/* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX	0	/* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX	1	/* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM	2	/* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX	3	/* Linux-defined */
+#define MC_CAPS_STD_ECX	4	/* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA	5	/* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX	6	/* cpuid level 0x80000001 (%ecx) */
+
+struct mcinfo_logical_cpu {
+	uint32_t mc_cpunr;
+	uint32_t mc_chipid;
+	uint16_t mc_coreid;
+	uint16_t mc_threadid;
+	uint32_t mc_apicid;
+	uint32_t mc_clusterid;
+	uint32_t mc_ncores;
+	uint32_t mc_ncores_active;
+	uint32_t mc_nthreads;
+	int32_t mc_cpuid_level;
+	uint32_t mc_family;
+	uint32_t mc_vendor;
+	uint32_t mc_model;
+	uint32_t mc_step;
+	char mc_vendorid[16];
+	char mc_brandid[64];
+	uint32_t mc_cpu_caps[MC_NCAPS];
+	uint32_t mc_cache_size;
+	uint32_t mc_cache_alignment;
+	int32_t mc_nmsrvals;
+	struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+};
+typedef struct mcinfo_logical_cpu mcinfo_logical_cpu_t;
+DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
+
+
+/*
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi)    \
+    ((_mi)->mi_nentries)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi)       \
+    ((struct mcinfo_common *)(_mi)->mi_data)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic)       \
+    ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
+
+/* Prototype:
+ *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+
+static inline void x86_mcinfo_lookup
+	(struct mcinfo_common **ret, struct mc_info *mi, uint16_t type)
+{
+	uint32_t found = 0, i;
+	struct mcinfo_common *mic;
+
+	*ret = NULL;
+	if (!mi)
+		return;
+	mic = x86_mcinfo_first(mi);
+
+	for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
+		if (mic->type == type) {
+			found = 1;
+			break;
+		}
+		mic = x86_mcinfo_next(mic);
+	}
+
+	*ret = found ? mic : NULL;
+}
+/* Usecase 1
+ * Register machine check trap callback handler
+ *    (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch            1
+struct xen_mc_fetch {
+    /* IN/OUT variables.
+	 * IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
+	 * XEN_MC_ACK if ack'king an earlier fetch
+	 * OUT: XEN_MC_OK, XEN_MC_FETCHAILED,
+	 * XEN_MC_NODATA, XEN_MC_NOMATCH
+	*/
+	uint32_t flags;
+	uint32_t _pad0;
+	/* OUT: id for ack, IN: id we are ack'ing */
+	uint64_t fetch_id;
+
+	/* OUT variables. */
+	GUEST_HANDLE(mc_info) data;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain     2
+struct xen_mc_notifydomain {
+	/* IN variables. */
+	uint16_t mc_domid;/* The unprivileged domain to notify. */
+	uint16_t mc_vcpuid;/* The vcpu in mc_domid to notify.
+			* Usually echo'd value from the fetch hypercall. */
+
+	/* IN/OUT variables. */
+	uint32_t flags;
+
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
+
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+	/* IN/OUT */
+	uint32_t ncpus;
+	uint32_t _pad0;
+	/* OUT */
+	GUEST_HANDLE(mcinfo_logical_cpu) info;
+};
+
+#define XEN_MC_msrinject    4
+#define MC_MSRINJ_MAXMSRS       8
+struct xen_mc_msrinject {
+	/* IN */
+	uint32_t mcinj_cpunr;/* target processor id */
+	uint32_t mcinj_flags;/* see MC_MSRINJ_F_* below */
+	uint32_t mcinj_count;/* 0 .. count-1 in array are valid */
+	uint32_t _pad0;
+	struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE   0x1
+
+#define XEN_MC_mceinject    5
+struct xen_mc_mceinject {
+	unsigned int mceinj_cpunr;      /* target processor id */
+};
+
+struct xen_mc {
+	uint32_t cmd;
+	uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+	union {
+		struct xen_mc_fetch        mc_fetch;
+		struct xen_mc_notifydomain mc_notifydomain;
+		struct xen_mc_physcpuinfo  mc_physcpuinfo;
+		struct xen_mc_msrinject    mc_msrinject;
+		struct xen_mc_mceinject    mc_mceinject;
+	} u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
-- 
1.5.1


[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-08-10  3:08                 ` Ke, Liping
@ 2009-08-13 20:55                   ` Jeremy Fitzhardinge
  2009-08-18  9:02                   ` Jan Beulich
  1 sibling, 0 replies; 26+ messages in thread
From: Jeremy Fitzhardinge @ 2009-08-13 20:55 UTC (permalink / raw)
  To: Ke, Liping
  Cc: Kleen, Andi, Christoph Egger, xen-devel@lists.xensource.com,
	Keir Fraser, Jan Beulich

On 08/09/09 20:08, Ke, Liping wrote:
> Hi, Jeremy
> Thanks for the detailed comments. I modified below:
> 1. change cs902 to cs: 75ebfa7fbdc
> 2. I renamed mce_dom0.c to mce.c and dom0 related message/variables.
> 3. use #ifdef CONFIG_XEN_MCE instead.
> Thanks a lot!
>   

I pushed this to rebase/dom0/mce, but I haven't merged it with master
yet.  I changed the way the MCE/MCA feature bits get masked: they're
masked in non-dom0 domains like ACPI and APIC, but left as-is otherwise,
since the kernel shouldn't care about them unless CONFIG_X86_MCE_INTEL
is enabled anyway.

    J

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-08-10  3:08                 ` Ke, Liping
  2009-08-13 20:55                   ` Jeremy Fitzhardinge
@ 2009-08-18  9:02                   ` Jan Beulich
  2009-08-18  9:25                     ` Ke, Liping
  1 sibling, 1 reply; 26+ messages in thread
From: Jan Beulich @ 2009-08-18  9:02 UTC (permalink / raw)
  To: Liping Ke
  Cc: Andi Kleen, Christoph Egger, xen-devel@lists.xensource.com,
	Keir Fraser, Jeremy Fitzhardinge

>>> "Ke, Liping" <liping.ke@intel.com> 10.08.09 05:08 >>>
>From e0815ca1fe7fbcc2f1de23608148e8d612974f2b Mon Sep 17 00:00:00 2001
>From: Liping Ke <liping.ke@intel.com>
>Date: Mon, 10 Aug 2009 11:04:21 +0800
>Subject: [PATCH] Add MCA Logging support in pv-ops
>
>This patch is backport from previous DOM0(2.6.18) cs: 75e5bfa7fbdc
>
>When an MCE/CMCI error happens (or by polling), the related error
>information will be sent to privileged pv-ops domain by XEN. This
>patch will help to fetch the xen-logged information by hypercall
>and then convert XEN-format log into Linux format MCELOG. It makes
>using current available mcelog tools for native Linux possible.
>
>With this patch, after mce/cmci error log information is sent to
>pv-ops guest, Running mcelog tools in the guest, you will get same
>detailed decoded mce information as in Native Linux.
>
>Signed-off-by: Liping Ke <liping.ke@intel.com>
>Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>

The order of operations in bind_virq_for_mce() needs to change: You must
not register the vIRQ handler if any of the memory allocations failed,
otherwise the interrupt handler will de-reference NULL.

Jan

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-08-18  9:02                   ` Jan Beulich
@ 2009-08-18  9:25                     ` Ke, Liping
  2009-08-18 17:51                       ` Jeremy Fitzhardinge
  0 siblings, 1 reply; 26+ messages in thread
From: Ke, Liping @ 2009-08-18  9:25 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Kleen, Andi, Christoph Egger, xen-devel@lists.xensource.com,
	Keir Fraser, Jeremy Fitzhardinge

[-- Attachment #1: Type: text/plain, Size: 28959 bytes --]

Hi, Jan
Yes, the added physinfo fetch op should be placed before
registering the vIRQ handler. Thanks for pointing out.
Below is the updated patch.
Regards,
Criping

Jan Beulich wrote:
>>>> "Ke, Liping" <liping.ke@intel.com> 10.08.09 05:08 >>>
>> From e0815ca1fe7fbcc2f1de23608148e8d612974f2b Mon Sep 17 00:00:00
>> 2001 From: Liping Ke <liping.ke@intel.com>
>> Date: Mon, 10 Aug 2009 11:04:21 +0800
>> Subject: [PATCH] Add MCA Logging support in pv-ops
>>
>> This patch is backport from previous DOM0(2.6.18) cs: 75e5bfa7fbdc
>>
>> When an MCE/CMCI error happens (or by polling), the related error
>> information will be sent to privileged pv-ops domain by XEN. This
>> patch will help to fetch the xen-logged information by hypercall
>> and then convert XEN-format log into Linux format MCELOG. It makes
>> using current available mcelog tools for native Linux possible.
>>
>> With this patch, after mce/cmci error log information is sent to
>> pv-ops guest, Running mcelog tools in the guest, you will get same
>> detailed decoded mce information as in Native Linux.
>>
>> Signed-off-by: Liping Ke <liping.ke@intel.com>
>> Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
>
> The order of operations in bind_virq_for_mce() needs to change: You
> must not register the vIRQ handler if any of the memory allocations
> failed, otherwise the interrupt handler will de-reference NULL.
>
> Jan

>From c2a612682cd1817c4852492ec077a547cadffbf7 Mon Sep 17 00:00:00 2001
From: Liping Ke <liping.ke@intel.com>
Date: Tue, 18 Aug 2009 17:11:36 +0800
Subject: [PATCH] Add MCA Logging support in pv-ops

This patch is backport from previous DOM0(2.6.18) cs: 75e5bfa7fbdc

When an MCE/CMCI error happens (or by polling), the related error
information will be sent to privileged pv-ops domain by XEN. This
patch will help to fetch the xen-logged information by hypercall
and then convert XEN-format log into Linux format MCELOG. It makes
using current available mcelog tools for native Linux possible.

With this patch, after mce/cmci error log information is sent to
pv-ops guest, Running mcelog tools in the guest, you will get same
detailed decoded mce information as in Native Linux.

Signed-off-by: Liping Ke <liping.ke@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
---
 arch/x86/include/asm/xen/hypercall.h |    8 +
 arch/x86/xen/enlighten.c             |    6 +
 drivers/xen/Kconfig                  |    6 +-
 drivers/xen/Makefile                 |    3 +-
 drivers/xen/mce.c                    |  213 +++++++++++++++++
 include/xen/interface/xen-mca.h      |  429 ++++++++++++++++++++++++++++++++++
 6 files changed, 663 insertions(+), 2 deletions(-)
 create mode 100644 drivers/xen/mce.c
 create mode 100644 include/xen/interface/xen-mca.h

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3da450b..42a5601 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -46,6 +46,7 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/platform.h>
+#include <xen/interface/xen-mca.h>

 /*
  * The hypercall asms have to meet several constraints:
@@ -307,6 +308,13 @@ HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
 }

 static inline int
+HYPERVISOR_mca(struct xen_mc *mc_op)
+{
+       mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+       return _hypercall1(int, mca, mc_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
        return _hypercall2(int, set_debugreg, reg, value);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 78970be..634543e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -224,6 +224,12 @@ static __init void xen_init_cpuid_mask(void)
                  (1 << X86_FEATURE_PAT)  |  /* disable PAT */
                  (1 << X86_FEATURE_ACC));   /* thermal monitoring */

+#ifdef CONFIG_XEN_MCE
+       cpuid_leaf1_edx_mask =
+               ~((1 << X86_FEATURE_PAT)  |  /* disable PAT */
+                 (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+#endif
+
        cpuid_leaf81_edx_mask = ~(1 << (X86_FEATURE_GBPAGES % 32));

        if (!xen_initial_domain())
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 3b1c421..c627fee 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -88,6 +88,10 @@ config XEN_SYS_HYPERVISOR
 config XEN_XENBUS_FRONTEND
        tristate

+config XEN_MCE
+       def_bool y
+       depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
+
 config XEN_S3
        def_bool y
-       depends on XEN_DOM0 && ACPI
\ No newline at end of file
+       depends on XEN_DOM0 && ACPI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 386c775..cd7b185 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND)        += blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
 obj-$(CONFIG_XENFS)                    += xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)       += sys-hypervisor.o
-obj-$(CONFIG_XEN_S3)                   += acpi.o
\ No newline at end of file
+obj-$(CONFIG_XEN_MCE)  += mce.o
+obj-$(CONFIG_XEN_S3)                   += acpi.o
diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c
new file mode 100644
index 0000000..b354dc8
--- /dev/null
+++ b/drivers/xen/mce.c
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * mce.c
+ * Add Machine Check event Logging support in DOM0
+ *
+ * Driver for receiving and logging machine check event
+ *
+ * Copyright (c) 2008, 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <xen/interface/xen.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/interface/vcpu.h>
+#include <asm/xen/hypercall.h>
+#include <asm/mce.h>
+
+static mc_info_t *g_mi;
+static mcinfo_logical_cpu_t *g_physinfo;
+static uint32_t ncpus;
+
+static int convert_log(struct mc_info *mi)
+{
+       struct mcinfo_common *mic = NULL;
+       struct mcinfo_global *mc_global;
+       struct mcinfo_bank *mc_bank;
+       struct mce m;
+       int i, found = 0;
+
+       x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
+       WARN_ON(!mic);
+
+       mce_setup(&m);
+       mc_global = (struct mcinfo_global *)mic;
+       m.mcgstatus = mc_global->mc_gstatus;
+       m.apicid = mc_global->mc_apicid;
+       for (i = 0; i < ncpus; i++) {
+               if (g_physinfo[i].mc_apicid == m.apicid) {
+                       found = 1;
+                       break;
+               }
+       }
+       WARN_ON(!found);
+
+       m.socketid = g_physinfo[i].mc_chipid;
+       m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
+       m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
+       x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
+       do {
+               if (mic == NULL || mic->size == 0)
+                       break;
+               if (mic->type == MC_TYPE_BANK) {
+                       mc_bank = (struct mcinfo_bank *)mic;
+                       m.misc = mc_bank->mc_misc;
+                       m.status = mc_bank->mc_status;
+                       m.addr = mc_bank->mc_addr;
+                       m.tsc = mc_bank->mc_tsc;
+                       m.bank = mc_bank->mc_bank;
+                       /*log this record*/
+                       mce_log(&m);
+               }
+               mic = x86_mcinfo_next(mic);
+       } while (1);
+
+       return 0;
+}
+
+/*pv_ops domain mce virq handler, logging physical mce error info*/
+static irqreturn_t mce_dom_interrupt(int irq, void *dev_id)
+{
+       xen_mc_t mc_op;
+       int result = 0;
+
+       mc_op.cmd = XEN_MC_fetch;
+       mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+       set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
+urgent:
+       mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
+       result = HYPERVISOR_mca(&mc_op);
+       if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+                       mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+               goto nonurgent;
+       else {
+               result = convert_log(g_mi);
+               if (result)
+                       goto end;
+               /* After fetching the error event log entry from DOM0,
+                * we need to dec the refcnt and release the entry.
+                * The entry is reserved and inc refcnt when filling
+                * the error log entry.
+                */
+               mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
+               result = HYPERVISOR_mca(&mc_op);
+               goto urgent;
+       }
+nonurgent:
+       mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
+       result = HYPERVISOR_mca(&mc_op);
+       if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+                       mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+               goto end;
+       else {
+               result = convert_log(g_mi);
+               if (result)
+                       goto end;
+               /* After fetching the error event log entry from DOM0,
+                * we need to dec the refcnt and release the entry. The
+                * entry is reserved and inc refcnt when filling the
+                * error log entry.
+                */
+               mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
+               result = HYPERVISOR_mca(&mc_op);
+               goto nonurgent;
+       }
+end:
+       return IRQ_HANDLED;
+}
+
+static int bind_virq_for_mce(void)
+{
+       int ret;
+       xen_mc_t mc_op;
+
+       g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
+
+       if (!g_mi)
+               return -ENOMEM;
+
+       /* Fetch physical CPU Numbers */
+       mc_op.cmd = XEN_MC_physcpuinfo;
+       mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+       set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+       ret = HYPERVISOR_mca(&mc_op);
+       if (ret) {
+               printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPU numbers\n");
+               kfree(g_mi);
+               return ret;
+       }
+
+       /* Fetch each CPU Physical Info for later reference*/
+       ncpus = mc_op.u.mc_physcpuinfo.ncpus;
+       g_physinfo = kmalloc(sizeof(struct mcinfo_logical_cpu)*ncpus,
+                                       GFP_KERNEL);
+       if (!g_physinfo) {
+               kfree(g_mi);
+               return -ENOMEM;
+       }
+       set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+       ret = HYPERVISOR_mca(&mc_op);
+       if (ret) {
+               printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPUs info\n");
+               kfree(g_mi);
+               kfree(g_physinfo);
+               return ret;
+       }
+
+       ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+               mce_dom_interrupt, 0, "mce", NULL);
+
+       if (ret < 0) {
+               printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static int __init mcelog_init(void)
+{
+       /* Only DOM0 is responsible for MCE logging */
+       if (xen_initial_domain())
+               return bind_virq_for_mce();
+
+       return 0;
+}
+
+
+static void __exit mcelog_cleanup(void)
+{
+       kfree(g_mi);
+       kfree(g_physinfo);
+}
+module_init(mcelog_init);
+module_exit(mcelog_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
new file mode 100644
index 0000000..f31fdab
--- /dev/null
+++ b/include/xen/interface/xen-mca.h
@@ -0,0 +1,429 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ *
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ *    (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ *    (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ *    This is better done as separate task, physical CPU hotplugging,
+ *    and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ *    move a DomU (or Dom0 itself) away from a malicious page
+ *    producing correctable errors.
+ * 9. offlining physical page:
+ *    Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ *     and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+/*
+ * The xen-unstable repo has interface version 0x03000001; out interface
+ * is incompatible with that and any future minor revisions, so we
+ * choose a different version number range that is numerically less
+ * than that used in xen-unstable.
+ */
+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
+
+/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
+#define XEN_MC_NONURGENT  0x0001
+/* IN: Dom0/DomU calls hypercall to retrieve urgent error log entry */
+#define XEN_MC_URGENT     0x0002
+/* IN: Dom0 acknowledges previosly-fetched error log entry */
+#define XEN_MC_ACK        0x0004
+
+/* OUT: All is ok */
+#define XEN_MC_OK           0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED  0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA       0x2
+/* OUT: Between notification time and this hypercall an other
+ *  (most likely) correctable error happened. The fetched data,
+ *  does not match the original machine check data. */
+#define XEN_MC_NOMATCH      0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ */
+#define MC_TYPE_GLOBAL          0
+#define MC_TYPE_BANK            1
+#define MC_TYPE_EXTENDED        2
+#define MC_TYPE_RECOVERY        3
+
+struct mcinfo_common {
+       uint16_t type;      /* structure type */
+       uint16_t size;      /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE     (1 << 0)
+#define MC_FLAG_UNCORRECTABLE   (1 << 1)
+#define MC_FLAG_RECOVERABLE    (1 << 2)
+#define MC_FLAG_POLLED         (1 << 3)
+#define MC_FLAG_RESET          (1 << 4)
+#define MC_FLAG_CMCI           (1 << 5)
+#define MC_FLAG_MCE            (1 << 6)
+/* contains global x86 mc information */
+struct mcinfo_global {
+       struct mcinfo_common common;
+
+       /* running domain at the time in error (most likely
+        * the impacted one) */
+       uint16_t mc_domid;
+       uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+       uint32_t mc_socketid; /* physical socket of the physical core */
+       uint16_t mc_coreid; /* physical impacted core */
+       uint16_t mc_core_threadid; /* core thread of physical core */
+       uint32_t mc_apicid;
+       uint32_t mc_flags;
+       uint64_t mc_gstatus; /* global status */
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+       struct mcinfo_common common;
+
+       uint16_t mc_bank; /* bank nr */
+       uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on
+                       * privileged pv-ops dom and if mc_addr is valid.
+                       * Never valid on DomU. */
+       uint64_t mc_status; /* bank status */
+       uint64_t mc_addr;   /* bank address, only valid
+                                        * if addr bit is set in mc_status */
+       uint64_t mc_misc;
+       uint64_t mc_ctrl2;
+       uint64_t mc_tsc;
+};
+
+
+struct mcinfo_msr {
+       uint64_t reg;   /* MSR */
+       uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */
+struct mcinfo_extended {
+       struct mcinfo_common common;
+
+       /* You can fill up to five registers.
+        * If you need more, then use this structure
+        * multiple times. */
+
+       uint32_t mc_msrs; /* Number of msr with valid values. */
+       /*
+        * Currently Intel extended MSR (32/64) include all gp registers
+        * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
+        * useful at present. So expand this array to 16/32 to leave room.
+        */
+       struct mcinfo_msr mc_msr[sizeof(void *) * 4];
+};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action
+ * information to DOM0.
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action {
+       /* Params for passing the offlined page number to DOM0 */
+       uint64_t mfn;
+       uint64_t status;
+};
+
+struct cpu_offline_action {
+       /* Params for passing the identity of the offlined CPU to DOM0 */
+       uint32_t mc_socketid;
+       uint16_t mc_coreid;
+       uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mcinfo_recovery {
+       struct mcinfo_common common;
+       uint16_t mc_bank; /* bank nr */
+       /* Recovery Action Flags defined above such as REC_ACTION_DONE */
+       uint8_t action_flags;
+       /* Recovery Action types defined above such as MC_ACTION_PAGE_OFFLINE */
+       uint8_t action_types;
+       /* In future if more than one recovery action permitted per error bank,
+        * a mcinfo_recovery data array will be returned
+        */
+       union {
+               struct page_offline_action page_retire;
+               struct cpu_offline_action cpu_offline;
+               uint8_t pad[MAX_UNION_SIZE];
+       } action_info;
+};
+
+
+#define MCINFO_HYPERCALLSIZE   1024
+#define MCINFO_MAXSIZE         768
+
+struct mc_info {
+       /* Number of mcinfo_* entries in mi_data */
+       uint32_t mi_nentries;
+       uint32_t _pad0;
+       uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
+};
+typedef struct mc_info mc_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(mc_info);
+
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS       7       /* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX        0       /* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX        1       /* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM     2       /* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX  3       /* Linux-defined */
+#define MC_CAPS_STD_ECX        4       /* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA    5       /* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX        6       /* cpuid level 0x80000001 (%ecx) */
+
+struct mcinfo_logical_cpu {
+       uint32_t mc_cpunr;
+       uint32_t mc_chipid;
+       uint16_t mc_coreid;
+       uint16_t mc_threadid;
+       uint32_t mc_apicid;
+       uint32_t mc_clusterid;
+       uint32_t mc_ncores;
+       uint32_t mc_ncores_active;
+       uint32_t mc_nthreads;
+       int32_t mc_cpuid_level;
+       uint32_t mc_family;
+       uint32_t mc_vendor;
+       uint32_t mc_model;
+       uint32_t mc_step;
+       char mc_vendorid[16];
+       char mc_brandid[64];
+       uint32_t mc_cpu_caps[MC_NCAPS];
+       uint32_t mc_cache_size;
+       uint32_t mc_cache_alignment;
+       int32_t mc_nmsrvals;
+       struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+};
+typedef struct mcinfo_logical_cpu mcinfo_logical_cpu_t;
+DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
+
+
+/*
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi)    \
+    ((_mi)->mi_nentries)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi)       \
+    ((struct mcinfo_common *)(_mi)->mi_data)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic)       \
+    ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
+
+/* Prototype:
+ *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+
+static inline void x86_mcinfo_lookup
+       (struct mcinfo_common **ret, struct mc_info *mi, uint16_t type)
+{
+       uint32_t found = 0, i;
+       struct mcinfo_common *mic;
+
+       *ret = NULL;
+       if (!mi)
+               return;
+       mic = x86_mcinfo_first(mi);
+
+       for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
+               if (mic->type == type) {
+                       found = 1;
+                       break;
+               }
+               mic = x86_mcinfo_next(mic);
+       }
+
+       *ret = found ? mic : NULL;
+}
+/* Usecase 1
+ * Register machine check trap callback handler
+ *    (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch            1
+struct xen_mc_fetch {
+    /* IN/OUT variables.
+        * IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
+        * XEN_MC_ACK if ack'king an earlier fetch
+        * OUT: XEN_MC_OK, XEN_MC_FETCHAILED,
+        * XEN_MC_NODATA, XEN_MC_NOMATCH
+       */
+       uint32_t flags;
+       uint32_t _pad0;
+       /* OUT: id for ack, IN: id we are ack'ing */
+       uint64_t fetch_id;
+
+       /* OUT variables. */
+       GUEST_HANDLE(mc_info) data;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain     2
+struct xen_mc_notifydomain {
+       /* IN variables. */
+       uint16_t mc_domid;/* The unprivileged domain to notify. */
+       uint16_t mc_vcpuid;/* The vcpu in mc_domid to notify.
+                       * Usually echo'd value from the fetch hypercall. */
+
+       /* IN/OUT variables. */
+       uint32_t flags;
+
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
+
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+       /* IN/OUT */
+       uint32_t ncpus;
+       uint32_t _pad0;
+       /* OUT */
+       GUEST_HANDLE(mcinfo_logical_cpu) info;
+};
+
+#define XEN_MC_msrinject    4
+#define MC_MSRINJ_MAXMSRS       8
+struct xen_mc_msrinject {
+       /* IN */
+       uint32_t mcinj_cpunr;/* target processor id */
+       uint32_t mcinj_flags;/* see MC_MSRINJ_F_* below */
+       uint32_t mcinj_count;/* 0 .. count-1 in array are valid */
+       uint32_t _pad0;
+       struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE   0x1
+
+#define XEN_MC_mceinject    5
+struct xen_mc_mceinject {
+       unsigned int mceinj_cpunr;      /* target processor id */
+};
+
+struct xen_mc {
+       uint32_t cmd;
+       uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+       union {
+               struct xen_mc_fetch        mc_fetch;
+               struct xen_mc_notifydomain mc_notifydomain;
+               struct xen_mc_physcpuinfo  mc_physcpuinfo;
+               struct xen_mc_msrinject    mc_msrinject;
+               struct xen_mc_mceinject    mc_mceinject;
+       } u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
--
1.5.1

[-- Attachment #2: 0001-Add-MCA-Logging-support-in-pv-ops.patch --]
[-- Type: application/octet-stream, Size: 24207 bytes --]

From c2a612682cd1817c4852492ec077a547cadffbf7 Mon Sep 17 00:00:00 2001
From: Liping Ke <liping.ke@intel.com>
Date: Tue, 18 Aug 2009 17:11:36 +0800
Subject: [PATCH] Add MCA Logging support in pv-ops

This patch is backport from previous DOM0(2.6.18) cs: 75e5bfa7fbdc

When an MCE/CMCI error happens (or by polling), the related error
information will be sent to privileged pv-ops domain by XEN. This
patch will help to fetch the xen-logged information by hypercall
and then convert XEN-format log into Linux format MCELOG. It makes
using current available mcelog tools for native Linux possible.

With this patch, after mce/cmci error log information is sent to
pv-ops guest, Running mcelog tools in the guest, you will get same
detailed decoded mce information as in Native Linux.

Signed-off-by: Liping Ke <liping.ke@intel.com>
Signed-off-by: Yunhong Jiang <yunhong.jiang@intel.com>
---
 arch/x86/include/asm/xen/hypercall.h |    8 +
 arch/x86/xen/enlighten.c             |    6 +
 drivers/xen/Kconfig                  |    6 +-
 drivers/xen/Makefile                 |    3 +-
 drivers/xen/mce.c                    |  213 +++++++++++++++++
 include/xen/interface/xen-mca.h      |  429 ++++++++++++++++++++++++++++++++++
 6 files changed, 663 insertions(+), 2 deletions(-)
 create mode 100644 drivers/xen/mce.c
 create mode 100644 include/xen/interface/xen-mca.h

diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h
index 3da450b..42a5601 100644
--- a/arch/x86/include/asm/xen/hypercall.h
+++ b/arch/x86/include/asm/xen/hypercall.h
@@ -46,6 +46,7 @@
 #include <xen/interface/sched.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/platform.h>
+#include <xen/interface/xen-mca.h>
 
 /*
  * The hypercall asms have to meet several constraints:
@@ -307,6 +308,13 @@ HYPERVISOR_dom0_op(struct xen_platform_op *platform_op)
 }
 
 static inline int
+HYPERVISOR_mca(struct xen_mc *mc_op)
+{
+	mc_op->interface_version = XEN_MCA_INTERFACE_VERSION;
+	return _hypercall1(int, mca, mc_op);
+}
+
+static inline int
 HYPERVISOR_set_debugreg(int reg, unsigned long value)
 {
 	return _hypercall2(int, set_debugreg, reg, value);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 78970be..634543e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -224,6 +224,12 @@ static __init void xen_init_cpuid_mask(void)
 		  (1 << X86_FEATURE_PAT)  |  /* disable PAT */
 		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
 
+#ifdef CONFIG_XEN_MCE
+	cpuid_leaf1_edx_mask =
+		~((1 << X86_FEATURE_PAT)  |  /* disable PAT */
+		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+#endif
+
 	cpuid_leaf81_edx_mask = ~(1 << (X86_FEATURE_GBPAGES % 32));
 
 	if (!xen_initial_domain())
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 3b1c421..c627fee 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -88,6 +88,10 @@ config XEN_SYS_HYPERVISOR
 config XEN_XENBUS_FRONTEND
        tristate
 
+config XEN_MCE
+       def_bool y
+       depends on XEN_DOM0 && X86_64 && X86_MCE_INTEL
+
 config XEN_S3
        def_bool y
-       depends on XEN_DOM0 && ACPI
\ No newline at end of file
+       depends on XEN_DOM0 && ACPI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 386c775..cd7b185 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -9,4 +9,5 @@ obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
 obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
-obj-$(CONFIG_XEN_S3)			+= acpi.o
\ No newline at end of file
+obj-$(CONFIG_XEN_MCE)	+= mce.o
+obj-$(CONFIG_XEN_S3)			+= acpi.o
diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c
new file mode 100644
index 0000000..b354dc8
--- /dev/null
+++ b/drivers/xen/mce.c
@@ -0,0 +1,213 @@
+/******************************************************************************
+ * mce.c
+ * Add Machine Check event Logging support in DOM0
+ *
+ * Driver for receiving and logging machine check event
+ *
+ * Copyright (c) 2008, 2009 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <xen/interface/xen.h>
+#include <asm/xen/hypervisor.h>
+#include <xen/events.h>
+#include <xen/interface/vcpu.h>
+#include <asm/xen/hypercall.h>
+#include <asm/mce.h>
+
+static mc_info_t *g_mi;
+static mcinfo_logical_cpu_t *g_physinfo;
+static uint32_t ncpus;
+
+static int convert_log(struct mc_info *mi)
+{
+	struct mcinfo_common *mic = NULL;
+	struct mcinfo_global *mc_global;
+	struct mcinfo_bank *mc_bank;
+	struct mce m;
+	int i, found = 0;
+
+	x86_mcinfo_lookup(&mic, mi, MC_TYPE_GLOBAL);
+	WARN_ON(!mic);
+
+	mce_setup(&m);
+	mc_global = (struct mcinfo_global *)mic;
+	m.mcgstatus = mc_global->mc_gstatus;
+	m.apicid = mc_global->mc_apicid;
+	for (i = 0; i < ncpus; i++) {
+		if (g_physinfo[i].mc_apicid == m.apicid) {
+			found = 1;
+			break;
+		}
+	}
+	WARN_ON(!found);
+
+	m.socketid = g_physinfo[i].mc_chipid;
+	m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
+	m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
+	x86_mcinfo_lookup(&mic, mi, MC_TYPE_BANK);
+	do {
+		if (mic == NULL || mic->size == 0)
+			break;
+		if (mic->type == MC_TYPE_BANK) {
+			mc_bank = (struct mcinfo_bank *)mic;
+			m.misc = mc_bank->mc_misc;
+			m.status = mc_bank->mc_status;
+			m.addr = mc_bank->mc_addr;
+			m.tsc = mc_bank->mc_tsc;
+			m.bank = mc_bank->mc_bank;
+			/*log this record*/
+			mce_log(&m);
+		}
+		mic = x86_mcinfo_next(mic);
+	} while (1);
+
+	return 0;
+}
+
+/*pv_ops domain mce virq handler, logging physical mce error info*/
+static irqreturn_t mce_dom_interrupt(int irq, void *dev_id)
+{
+	xen_mc_t mc_op;
+	int result = 0;
+
+	mc_op.cmd = XEN_MC_fetch;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_fetch.data, g_mi);
+urgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_URGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+		goto nonurgent;
+	else {
+		result = convert_log(g_mi);
+		if (result)
+			goto end;
+		/* After fetching the error event log entry from DOM0,
+		 * we need to dec the refcnt and release the entry.
+		 * The entry is reserved and inc refcnt when filling
+		 * the error log entry.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_URGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+		goto urgent;
+	}
+nonurgent:
+	mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT;
+	result = HYPERVISOR_mca(&mc_op);
+	if (result || mc_op.u.mc_fetch.flags & XEN_MC_NODATA ||
+			mc_op.u.mc_fetch.flags & XEN_MC_FETCHFAILED)
+		goto end;
+	else {
+		result = convert_log(g_mi);
+		if (result)
+			goto end;
+		/* After fetching the error event log entry from DOM0,
+		 * we need to dec the refcnt and release the entry. The
+		 * entry is reserved and inc refcnt when filling the
+		 * error log entry.
+		 */
+		mc_op.u.mc_fetch.flags = XEN_MC_NONURGENT | XEN_MC_ACK;
+		result = HYPERVISOR_mca(&mc_op);
+		goto nonurgent;
+	}
+end:
+	return IRQ_HANDLED;
+}
+
+static int bind_virq_for_mce(void)
+{
+	int ret;
+	xen_mc_t mc_op;
+
+	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
+
+	if (!g_mi)
+		return -ENOMEM;
+
+	/* Fetch physical CPU Numbers */
+	mc_op.cmd = XEN_MC_physcpuinfo;
+	mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPU numbers\n");
+		kfree(g_mi);
+		return ret;
+	}
+
+	/* Fetch each CPU Physical Info for later reference*/
+	ncpus = mc_op.u.mc_physcpuinfo.ncpus;
+	g_physinfo = kmalloc(sizeof(struct mcinfo_logical_cpu)*ncpus,
+					GFP_KERNEL);
+	if (!g_physinfo) {
+		kfree(g_mi);
+		return -ENOMEM;
+	}
+	set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+	ret = HYPERVISOR_mca(&mc_op);
+	if (ret) {
+		printk(KERN_ERR "MCE_DOM0_LOG: Fail to get physical CPUs info\n");
+		kfree(g_mi);
+		kfree(g_physinfo);
+		return ret;
+	}
+
+	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+		mce_dom_interrupt, 0, "mce", NULL);
+
+	if (ret < 0) {
+		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __init mcelog_init(void)
+{
+	/* Only DOM0 is responsible for MCE logging */
+	if (xen_initial_domain())
+		return bind_virq_for_mce();
+
+	return 0;
+}
+
+
+static void __exit mcelog_cleanup(void)
+{
+	kfree(g_mi);
+	kfree(g_physinfo);
+}
+module_init(mcelog_init);
+module_exit(mcelog_cleanup);
+
+MODULE_LICENSE("GPL");
diff --git a/include/xen/interface/xen-mca.h b/include/xen/interface/xen-mca.h
new file mode 100644
index 0000000..f31fdab
--- /dev/null
+++ b/include/xen/interface/xen-mca.h
@@ -0,0 +1,429 @@
+/******************************************************************************
+ * arch-x86/mca.h
+ *
+ * Contributed by Advanced Micro Devices, Inc.
+ * Author: Christoph Egger <Christoph.Egger@amd.com>
+ *
+ * Guest OS machine check interface to x86 Xen.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+/* Full MCA functionality has the following Usecases from the guest side:
+ *
+ * Must have's:
+ * 1. Dom0 and DomU register machine check trap callback handlers
+ *    (already done via "set_trap_table" hypercall)
+ * 2. Dom0 registers machine check event callback handler
+ *    (doable via EVTCHNOP_bind_virq)
+ * 3. Dom0 and DomU fetches machine check data
+ * 4. Dom0 wants Xen to notify a DomU
+ * 5. Dom0 gets DomU ID from physical address
+ * 6. Dom0 wants Xen to kill DomU (already done for "xm destroy")
+ *
+ * Nice to have's:
+ * 7. Dom0 wants Xen to deactivate a physical CPU
+ *    This is better done as separate task, physical CPU hotplugging,
+ *    and hypercall(s) should be sysctl's
+ * 8. Page migration proposed from Xen NUMA work, where Dom0 can tell Xen to
+ *    move a DomU (or Dom0 itself) away from a malicious page
+ *    producing correctable errors.
+ * 9. offlining physical page:
+ *    Xen free's and never re-uses a certain physical page.
+ * 10. Testfacility: Allow Dom0 to write values into machine check MSR's
+ *     and tell Xen to trigger a machine check
+ */
+
+#ifndef __XEN_PUBLIC_ARCH_X86_MCA_H__
+#define __XEN_PUBLIC_ARCH_X86_MCA_H__
+
+/* Hypercall */
+#define __HYPERVISOR_mca __HYPERVISOR_arch_0
+
+/*
+ * The xen-unstable repo has interface version 0x03000001; out interface
+ * is incompatible with that and any future minor revisions, so we
+ * choose a different version number range that is numerically less
+ * than that used in xen-unstable.
+ */
+#define XEN_MCA_INTERFACE_VERSION 0x01ecc003
+
+/* IN: Dom0 calls hypercall to retrieve nonurgent error log entry */
+#define XEN_MC_NONURGENT  0x0001
+/* IN: Dom0/DomU calls hypercall to retrieve urgent error log entry */
+#define XEN_MC_URGENT     0x0002
+/* IN: Dom0 acknowledges previosly-fetched error log entry */
+#define XEN_MC_ACK        0x0004
+
+/* OUT: All is ok */
+#define XEN_MC_OK           0x0
+/* OUT: Domain could not fetch data. */
+#define XEN_MC_FETCHFAILED  0x1
+/* OUT: There was no machine check data to fetch. */
+#define XEN_MC_NODATA       0x2
+/* OUT: Between notification time and this hypercall an other
+ *  (most likely) correctable error happened. The fetched data,
+ *  does not match the original machine check data. */
+#define XEN_MC_NOMATCH      0x4
+
+/* OUT: DomU did not register MC NMI handler. Try something else. */
+#define XEN_MC_CANNOTHANDLE 0x8
+/* OUT: Notifying DomU failed. Retry later or try something else. */
+#define XEN_MC_NOTDELIVERED 0x10
+/* Note, XEN_MC_CANNOTHANDLE and XEN_MC_NOTDELIVERED are mutually exclusive. */
+
+
+#ifndef __ASSEMBLY__
+
+#define VIRQ_MCA VIRQ_ARCH_0 /* G. (DOM0) Machine Check Architecture */
+
+/*
+ * Machine Check Architecure:
+ * structs are read-only and used to report all kinds of
+ * correctable and uncorrectable errors detected by the HW.
+ * Dom0 and DomU: register a handler to get notified.
+ * Dom0 only: Correctable errors are reported via VIRQ_MCA
+ */
+#define MC_TYPE_GLOBAL          0
+#define MC_TYPE_BANK            1
+#define MC_TYPE_EXTENDED        2
+#define MC_TYPE_RECOVERY        3
+
+struct mcinfo_common {
+	uint16_t type;      /* structure type */
+	uint16_t size;      /* size of this struct in bytes */
+};
+
+
+#define MC_FLAG_CORRECTABLE     (1 << 0)
+#define MC_FLAG_UNCORRECTABLE   (1 << 1)
+#define MC_FLAG_RECOVERABLE	(1 << 2)
+#define MC_FLAG_POLLED		(1 << 3)
+#define MC_FLAG_RESET		(1 << 4)
+#define MC_FLAG_CMCI		(1 << 5)
+#define MC_FLAG_MCE		(1 << 6)
+/* contains global x86 mc information */
+struct mcinfo_global {
+	struct mcinfo_common common;
+
+	/* running domain at the time in error (most likely
+	 * the impacted one) */
+	uint16_t mc_domid;
+	uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
+	uint32_t mc_socketid; /* physical socket of the physical core */
+	uint16_t mc_coreid; /* physical impacted core */
+	uint16_t mc_core_threadid; /* core thread of physical core */
+	uint32_t mc_apicid;
+	uint32_t mc_flags;
+	uint64_t mc_gstatus; /* global status */
+};
+
+/* contains bank local x86 mc information */
+struct mcinfo_bank {
+	struct mcinfo_common common;
+
+	uint16_t mc_bank; /* bank nr */
+	uint16_t mc_domid; /* Usecase 5: domain referenced by mc_addr on
+			* privileged pv-ops dom and if mc_addr is valid.
+			* Never valid on DomU. */
+	uint64_t mc_status; /* bank status */
+	uint64_t mc_addr;   /* bank address, only valid
+					 * if addr bit is set in mc_status */
+	uint64_t mc_misc;
+	uint64_t mc_ctrl2;
+	uint64_t mc_tsc;
+};
+
+
+struct mcinfo_msr {
+	uint64_t reg;   /* MSR */
+	uint64_t value; /* MSR value */
+};
+
+/* contains mc information from other
+ * or additional mc MSRs */
+struct mcinfo_extended {
+	struct mcinfo_common common;
+
+	/* You can fill up to five registers.
+	 * If you need more, then use this structure
+	 * multiple times. */
+
+	uint32_t mc_msrs; /* Number of msr with valid values. */
+	/*
+	 * Currently Intel extended MSR (32/64) include all gp registers
+	 * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
+	 * useful at present. So expand this array to 16/32 to leave room.
+	 */
+	struct mcinfo_msr mc_msr[sizeof(void *) * 4];
+};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action
+ * information to DOM0.
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action {
+	/* Params for passing the offlined page number to DOM0 */
+	uint64_t mfn;
+	uint64_t status;
+};
+
+struct cpu_offline_action {
+	/* Params for passing the identity of the offlined CPU to DOM0 */
+	uint32_t mc_socketid;
+	uint16_t mc_coreid;
+	uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mcinfo_recovery {
+	struct mcinfo_common common;
+	uint16_t mc_bank; /* bank nr */
+	/* Recovery Action Flags defined above such as REC_ACTION_DONE */
+	uint8_t action_flags;
+	/* Recovery Action types defined above such as MC_ACTION_PAGE_OFFLINE */
+	uint8_t action_types;
+	/* In future if more than one recovery action permitted per error bank,
+	 * a mcinfo_recovery data array will be returned
+	 */
+	union {
+		struct page_offline_action page_retire;
+		struct cpu_offline_action cpu_offline;
+		uint8_t pad[MAX_UNION_SIZE];
+	} action_info;
+};
+
+
+#define MCINFO_HYPERCALLSIZE	1024
+#define MCINFO_MAXSIZE		768
+
+struct mc_info {
+	/* Number of mcinfo_* entries in mi_data */
+	uint32_t mi_nentries;
+	uint32_t _pad0;
+	uint64_t mi_data[(MCINFO_MAXSIZE - 1) / 8];
+};
+typedef struct mc_info mc_info_t;
+DEFINE_GUEST_HANDLE_STRUCT(mc_info);
+
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS	7	/* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX	0	/* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX	1	/* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM	2	/* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX	3	/* Linux-defined */
+#define MC_CAPS_STD_ECX	4	/* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA	5	/* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX	6	/* cpuid level 0x80000001 (%ecx) */
+
+struct mcinfo_logical_cpu {
+	uint32_t mc_cpunr;
+	uint32_t mc_chipid;
+	uint16_t mc_coreid;
+	uint16_t mc_threadid;
+	uint32_t mc_apicid;
+	uint32_t mc_clusterid;
+	uint32_t mc_ncores;
+	uint32_t mc_ncores_active;
+	uint32_t mc_nthreads;
+	int32_t mc_cpuid_level;
+	uint32_t mc_family;
+	uint32_t mc_vendor;
+	uint32_t mc_model;
+	uint32_t mc_step;
+	char mc_vendorid[16];
+	char mc_brandid[64];
+	uint32_t mc_cpu_caps[MC_NCAPS];
+	uint32_t mc_cache_size;
+	uint32_t mc_cache_alignment;
+	int32_t mc_nmsrvals;
+	struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+};
+typedef struct mcinfo_logical_cpu mcinfo_logical_cpu_t;
+DEFINE_GUEST_HANDLE_STRUCT(mcinfo_logical_cpu);
+
+
+/*
+ * OS's should use these instead of writing their own lookup function
+ * each with its own bugs and drawbacks.
+ * We use macros instead of static inline functions to allow guests
+ * to include this header in assembly files (*.S).
+ */
+/* Prototype:
+ *    uint32_t x86_mcinfo_nentries(struct mc_info *mi);
+ */
+#define x86_mcinfo_nentries(_mi)    \
+    ((_mi)->mi_nentries)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi)       \
+    ((struct mcinfo_common *)(_mi)->mi_data)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic)       \
+    ((struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size))
+
+/* Prototype:
+ *    void x86_mcinfo_lookup(void *ret, struct mc_info *mi, uint16_t type);
+ */
+
+static inline void x86_mcinfo_lookup
+	(struct mcinfo_common **ret, struct mc_info *mi, uint16_t type)
+{
+	uint32_t found = 0, i;
+	struct mcinfo_common *mic;
+
+	*ret = NULL;
+	if (!mi)
+		return;
+	mic = x86_mcinfo_first(mi);
+
+	for (i = 0; i < x86_mcinfo_nentries(mi); i++) {
+		if (mic->type == type) {
+			found = 1;
+			break;
+		}
+		mic = x86_mcinfo_next(mic);
+	}
+
+	*ret = found ? mic : NULL;
+}
+/* Usecase 1
+ * Register machine check trap callback handler
+ *    (already done via "set_trap_table" hypercall)
+ */
+
+/* Usecase 2
+ * Dom0 registers machine check event callback handler
+ * done by EVTCHNOP_bind_virq
+ */
+
+/* Usecase 3
+ * Fetch machine check data from hypervisor.
+ * Note, this hypercall is special, because both Dom0 and DomU must use this.
+ */
+#define XEN_MC_fetch            1
+struct xen_mc_fetch {
+    /* IN/OUT variables.
+	 * IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
+	 * XEN_MC_ACK if ack'king an earlier fetch
+	 * OUT: XEN_MC_OK, XEN_MC_FETCHAILED,
+	 * XEN_MC_NODATA, XEN_MC_NOMATCH
+	*/
+	uint32_t flags;
+	uint32_t _pad0;
+	/* OUT: id for ack, IN: id we are ack'ing */
+	uint64_t fetch_id;
+
+	/* OUT variables. */
+	GUEST_HANDLE(mc_info) data;
+};
+typedef struct xen_mc_fetch xen_mc_fetch_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_fetch);
+
+
+/* Usecase 4
+ * This tells the hypervisor to notify a DomU about the machine check error
+ */
+#define XEN_MC_notifydomain     2
+struct xen_mc_notifydomain {
+	/* IN variables. */
+	uint16_t mc_domid;/* The unprivileged domain to notify. */
+	uint16_t mc_vcpuid;/* The vcpu in mc_domid to notify.
+			* Usually echo'd value from the fetch hypercall. */
+
+	/* IN/OUT variables. */
+	uint32_t flags;
+
+/* OUT: XEN_MC_OK, XEN_MC_CANNOTHANDLE, XEN_MC_NOTDELIVERED, XEN_MC_NOMATCH */
+};
+typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc_notifydomain);
+
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+	/* IN/OUT */
+	uint32_t ncpus;
+	uint32_t _pad0;
+	/* OUT */
+	GUEST_HANDLE(mcinfo_logical_cpu) info;
+};
+
+#define XEN_MC_msrinject    4
+#define MC_MSRINJ_MAXMSRS       8
+struct xen_mc_msrinject {
+	/* IN */
+	uint32_t mcinj_cpunr;/* target processor id */
+	uint32_t mcinj_flags;/* see MC_MSRINJ_F_* below */
+	uint32_t mcinj_count;/* 0 .. count-1 in array are valid */
+	uint32_t _pad0;
+	struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE   0x1
+
+#define XEN_MC_mceinject    5
+struct xen_mc_mceinject {
+	unsigned int mceinj_cpunr;      /* target processor id */
+};
+
+struct xen_mc {
+	uint32_t cmd;
+	uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
+	union {
+		struct xen_mc_fetch        mc_fetch;
+		struct xen_mc_notifydomain mc_notifydomain;
+		struct xen_mc_physcpuinfo  mc_physcpuinfo;
+		struct xen_mc_msrinject    mc_msrinject;
+		struct xen_mc_mceinject    mc_mceinject;
+	} u;
+};
+typedef struct xen_mc xen_mc_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_mc);
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __XEN_PUBLIC_ARCH_X86_MCA_H__ */
-- 
1.5.1


[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-08-18  9:25                     ` Ke, Liping
@ 2009-08-18 17:51                       ` Jeremy Fitzhardinge
  2009-08-19  1:25                         ` Ke, Liping
  0 siblings, 1 reply; 26+ messages in thread
From: Jeremy Fitzhardinge @ 2009-08-18 17:51 UTC (permalink / raw)
  To: Ke, Liping
  Cc: Kleen, Andi, Christoph Egger, xen-devel@lists.xensource.com,
	Keir Fraser, Jan Beulich

On 08/18/09 02:25, Ke, Liping wrote:
> Hi, Jan
> Yes, the added physinfo fetch op should be placed before
> registering the vIRQ handler. Thanks for pointing out.
> Below is the updated patch.
>   

Please send a delta against the previous patch.

    J

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-08-18 17:51                       ` Jeremy Fitzhardinge
@ 2009-08-19  1:25                         ` Ke, Liping
  2009-08-19  1:31                           ` Ke, Liping
  0 siblings, 1 reply; 26+ messages in thread
From: Ke, Liping @ 2009-08-19  1:25 UTC (permalink / raw)
  To: Jeremy Fitzhardinge
  Cc: Kleen, Andi, Christoph Egger, xen-devel@lists.xensource.com,
	Keir Fraser, Jan Beulich

[-- Attachment #1: Type: text/plain, Size: 1654 bytes --]

Hi, Jeremy
This is the delta patch. Thanks a lot!

Regards,
Criping


>From 1d3152ae0c69a4618bb2137ec5bf150d595de26f Mon Sep 17 00:00:00 2001
From: root <root@lke-ep.sh.intel.com>
Date: Wed, 19 Aug 2009 09:16:22 +0800
Subject: [PATCH] Small fix for MCA Logging in pv-ops

We need to move mce vIRQ handler registration after all
other ops succeeds as Jan points out.

Signed-off-by: Liping Ke<liping.ke@intel.com>
---
 drivers/xen/mce.c |   16 ++++++++--------
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c
index ef838f2..b354dc8 100644
--- a/drivers/xen/mce.c
+++ b/drivers/xen/mce.c
@@ -148,14 +148,6 @@ static int bind_virq_for_mce(void)
 	int ret;
 	xen_mc_t mc_op;
 
-	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
-		mce_dom_interrupt, 0, "mce", NULL);
-
-	if (ret < 0) {
-		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
-		return ret;
-	}
-
 	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
 
 	if (!g_mi)
@@ -189,6 +181,14 @@ static int bind_virq_for_mce(void)
 		return ret;
 	}
 
+	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+		mce_dom_interrupt, 0, "mce", NULL);
+
+	if (ret < 0) {
+		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+		return ret;
+	}
+
 	return 0;
 }
 
-- 
1.5.1





Jeremy Fitzhardinge wrote:
> On 08/18/09 02:25, Ke, Liping wrote:
>> Hi, Jan
>> Yes, the added physinfo fetch op should be placed before
>> registering the vIRQ handler. Thanks for pointing out. Below is the
>> updated patch. 
>> 
> 
> Please send a delta against the previous patch.
> 
>     J


[-- Attachment #2: 0001-Small-fix-for-MCA-Logging-in-pv-ops.patch --]
[-- Type: application/octet-stream, Size: 1210 bytes --]

From 1d3152ae0c69a4618bb2137ec5bf150d595de26f Mon Sep 17 00:00:00 2001
From: root <root@lke-ep.sh.intel.com>
Date: Wed, 19 Aug 2009 09:16:22 +0800
Subject: [PATCH] Small fix for MCA Logging in pv-ops

We need to move mce vIRQ handler registration after all
other ops succeeds as Jan points out.

Signed-off-by: Liping Ke<liping.ke@intel.com>
---
 drivers/xen/mce.c |   16 ++++++++--------
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c
index ef838f2..b354dc8 100644
--- a/drivers/xen/mce.c
+++ b/drivers/xen/mce.c
@@ -148,14 +148,6 @@ static int bind_virq_for_mce(void)
 	int ret;
 	xen_mc_t mc_op;
 
-	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
-		mce_dom_interrupt, 0, "mce", NULL);
-
-	if (ret < 0) {
-		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
-		return ret;
-	}
-
 	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
 
 	if (!g_mi)
@@ -189,6 +181,14 @@ static int bind_virq_for_mce(void)
 		return ret;
 	}
 
+	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+		mce_dom_interrupt, 0, "mce", NULL);
+
+	if (ret < 0) {
+		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+		return ret;
+	}
+
 	return 0;
 }
 
-- 
1.5.1


[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* RE: [pvops-dom0] Adding MCA logging support in  pv_ops
  2009-08-19  1:25                         ` Ke, Liping
@ 2009-08-19  1:31                           ` Ke, Liping
  0 siblings, 0 replies; 26+ messages in thread
From: Ke, Liping @ 2009-08-19  1:31 UTC (permalink / raw)
  To: Ke, Liping, Jeremy Fitzhardinge
  Cc: Christoph Egger, xen-devel@lists.xensource.com, Beulich, Jan,
	Keir Fraser, Kleen, Andi

[-- Attachment #1: Type: text/plain, Size: 3142 bytes --]

Hi, Jeremy

Please use this newer one. Modified contact header.
Thanks & Regards,
Criping



>From 1d3152ae0c69a4618bb2137ec5bf150d595de26f Mon Sep 17 00:00:00 2001
From: Liping Ke <liping.ke@intel.com>
Date: Wed, 19 Aug 2009 09:16:22 +0800
Subject: [PATCH] Small fix for MCA Logging in pv-ops

We need to move mce vIRQ handler registration after all
other ops succeeds as Jan points out.

Signed-off-by: Liping Ke<liping.ke@intel.com>
---
 drivers/xen/mce.c |   16 ++++++++--------
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c
index ef838f2..b354dc8 100644
--- a/drivers/xen/mce.c
+++ b/drivers/xen/mce.c
@@ -148,14 +148,6 @@ static int bind_virq_for_mce(void)
 	int ret;
 	xen_mc_t mc_op;
 
-	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
-		mce_dom_interrupt, 0, "mce", NULL);
-
-	if (ret < 0) {
-		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
-		return ret;
-	}
-
 	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
 
 	if (!g_mi)
@@ -189,6 +181,14 @@ static int bind_virq_for_mce(void)
 		return ret;
 	}
 
+	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+		mce_dom_interrupt, 0, "mce", NULL);
+
+	if (ret < 0) {
+		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+		return ret;
+	}
+
 	return 0;
 }
 
-- 
1.5.1



Ke, Liping wrote:
> Hi, Jeremy
> This is the delta patch. Thanks a lot!
> 
> Regards,
> Criping
> 
> 
>> From 1d3152ae0c69a4618bb2137ec5bf150d595de26f Mon Sep 17 00:00:00
>> 2001 
> From: root <root@lke-ep.sh.intel.com>
> Date: Wed, 19 Aug 2009 09:16:22 +0800
> Subject: [PATCH] Small fix for MCA Logging in pv-ops
> 
> We need to move mce vIRQ handler registration after all
> other ops succeeds as Jan points out.
> 
> Signed-off-by: Liping Ke<liping.ke@intel.com>
> ---
>  drivers/xen/mce.c |   16 ++++++++--------
>  1 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c
> index ef838f2..b354dc8 100644
> --- a/drivers/xen/mce.c
> +++ b/drivers/xen/mce.c
> @@ -148,14 +148,6 @@ static int bind_virq_for_mce(void)
>  	int ret;
>  	xen_mc_t mc_op;
> 
> -	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
> -		mce_dom_interrupt, 0, "mce", NULL);
> -
> -	if (ret < 0) {
> -		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
> -		return ret;
> -	}
> -
>  	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
> 
>  	if (!g_mi)
> @@ -189,6 +181,14 @@ static int bind_virq_for_mce(void)
>  		return ret;
>  	}
> 
> +	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
> +		mce_dom_interrupt, 0, "mce", NULL);
> +
> +	if (ret < 0) {
> +		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
> +		return ret;
> +	}
> +
>  	return 0;
>  }
> 
> 
> Jeremy Fitzhardinge wrote:
>> On 08/18/09 02:25, Ke, Liping wrote:
>>> Hi, Jan
>>> Yes, the added physinfo fetch op should be placed before
>>> registering the vIRQ handler. Thanks for pointing out. Below is the
>>> updated patch. 
>>> 
>> 
>> Please send a delta against the previous patch.
>> 
>>     J


[-- Attachment #2: 0001-Small-fix-for-MCA-Logging-in-pv-ops.patch --]
[-- Type: application/octet-stream, Size: 1210 bytes --]

From 1d3152ae0c69a4618bb2137ec5bf150d595de26f Mon Sep 17 00:00:00 2001
From: Liping Ke <liping.ke@intel.com>
Date: Wed, 19 Aug 2009 09:16:22 +0800
Subject: [PATCH] Small fix for MCA Logging in pv-ops

We need to move mce vIRQ handler registration after all
other ops succeeds as Jan points out.

Signed-off-by: Liping Ke<liping.ke@intel.com>
---
 drivers/xen/mce.c |   16 ++++++++--------
 1 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/mce.c b/drivers/xen/mce.c
index ef838f2..b354dc8 100644
--- a/drivers/xen/mce.c
+++ b/drivers/xen/mce.c
@@ -148,14 +148,6 @@ static int bind_virq_for_mce(void)
 	int ret;
 	xen_mc_t mc_op;
 
-	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
-		mce_dom_interrupt, 0, "mce", NULL);
-
-	if (ret < 0) {
-		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
-		return ret;
-	}
-
 	g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
 
 	if (!g_mi)
@@ -189,6 +181,14 @@ static int bind_virq_for_mce(void)
 		return ret;
 	}
 
+	ret  = bind_virq_to_irqhandler(VIRQ_MCA, 0,
+		mce_dom_interrupt, 0, "mce", NULL);
+
+	if (ret < 0) {
+		printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+		return ret;
+	}
+
 	return 0;
 }
 
-- 
1.5.1


[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply related	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2009-08-19  1:31 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-30  5:15 [pvops-dom0] Adding MCA logging support in pv_ops Ke, Liping
2009-07-30  8:24 ` Jan Beulich
2009-07-30  8:45   ` Ke, Liping
2009-07-30  9:23     ` Ke, Liping
2009-07-30 17:14       ` Jeremy Fitzhardinge
2009-08-05  2:13         ` Ke, Liping
2009-08-05 22:55           ` Jeremy Fitzhardinge
2009-08-07  2:32             ` Ke, Liping
2009-08-07 18:07               ` Jeremy Fitzhardinge
2009-08-10  3:08                 ` Ke, Liping
2009-08-13 20:55                   ` Jeremy Fitzhardinge
2009-08-18  9:02                   ` Jan Beulich
2009-08-18  9:25                     ` Ke, Liping
2009-08-18 17:51                       ` Jeremy Fitzhardinge
2009-08-19  1:25                         ` Ke, Liping
2009-08-19  1:31                           ` Ke, Liping
2009-08-05  2:54         ` Ke, Liping
2009-08-05  2:57           ` Ke, Liping
2009-07-31 15:19   ` Andi Kleen
2009-07-30  9:57 ` Christoph Egger
2009-08-05  1:57   ` Ke, Liping
2009-08-05  9:16     ` Christoph Egger
2009-08-06  1:37       ` Ke, Liping
2009-08-06  7:27         ` Christoph Egger
2009-08-06  8:49           ` Christoph Egger
2009-08-07  1:48             ` Ke, Liping

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.