xen-devel.lists.xenproject.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] tools/xen-mceinj: support AMD
@ 2012-10-05 14:07 Christoph Egger
  2012-10-19 13:10 ` Christoph Egger
  2012-10-29 10:20 ` Jan Beulich
  0 siblings, 2 replies; 16+ messages in thread
From: Christoph Egger @ 2012-10-05 14:07 UTC (permalink / raw)
  To: xen-devel@lists.xen.org

[-- Attachment #1: Type: text/plain, Size: 334 bytes --]


xen-mceinj: Support AMD

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>

-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

[-- Attachment #2: xen_mceinj.diff --]
[-- Type: text/plain, Size: 17460 bytes --]

# User Christoph Egger
# Date 1349437062 -7200
xen mceinj: support AMD.

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>

diff -r 21704bc429b4 -r 1a3eea784e09 tools/tests/mce-test/tools/xen-mceinj.c
--- a/tools/tests/mce-test/tools/xen-mceinj.c
+++ b/tools/tests/mce-test/tools/xen-mceinj.c
@@ -1,6 +1,7 @@
 /*
  * xen-mceinj.c: utilities to inject fake MCE for x86.
  * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2012, AMD Cooperation Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -18,6 +19,7 @@
  * Authors: Yunhong Jiang <yunhong.jiang@intel.com>
  *          Haicheng Li <haicheng.li@intel.com>
  *          Xudong Hao <xudong.hao@intel.com>
+ *          Christoph Egger <Christoph.Egger@amd.com>
  */
 
 
@@ -44,11 +46,14 @@
 #define MCi_type_STATUS     0x1
 #define MCi_type_ADDR       0x2
 #define MCi_type_MISC       0x3
-#define MCi_type_CTL2       0x4
+#define MC4_type_MISC1      0x4
+#define MC4_type_MISC2      0x5
+#define MC4_type_MISC3      0x6
+#define MCi_type_CTL2       0x7
 
 #define INVALID_MSR         ~0UL
 
-/* Intel MSRs */
+/* X86 machine check MSRs */
 #define MSR_IA32_MCG_CAP         0x00000179
 #define MSR_IA32_MCG_STATUS      0x0000017a
 #define MSR_IA32_MCG_CTL         0x0000017b
@@ -56,35 +61,66 @@
 #define MSR_IA32_MC0_STATUS      0x00000401
 #define MSR_IA32_MC0_ADDR        0x00000402
 #define MSR_IA32_MC0_MISC        0x00000403
+
+/* Intel MSRs */
 #define MSR_IA32_MC0_CTL2        0x00000280
 
-/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */
+/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */
 #define MCG_STATUS_SRAO_LLC_VAL  0x5
 #define MCE_SRAO_LLC_BANK        0x7
 #define MCi_STATUS_SRAO_LLC_VAL  0xBD2000008000017AUL
 #define MCi_MISC_SRAO_LLC_VAL    0x86UL
 
-/* Memory Patrol Scrub SRAO MCE */
+/* Intel: Memory Patrol Scrub SRAO MCE */
 #define MCG_STATUS_SRAO_MEM_VAL  0x5
 #define MCE_SRAO_MEM_BANK        0x8
 #define MCi_STATUS_SRAO_MEM_VAL  0xBD000000004000CFUL
 #define MCi_MISC_SRAO_MEM_VAL    0x86UL
 
-/* LLC EWB UCNA Error */
+/* Intel: LLC EWB UCNA Error */
 #define MCG_STATUS_UCNA_LLC_VAL  0x0
 #define CMCI_UCNA_LLC_BANK       0x9
 #define MCi_STATUS_UCNA_LLC_VAL  0xBC20000080000136UL
 #define MCi_MISC_UCNA_LLC_VAL    0x86UL
 
-/* Error Types */
-#define MCE_SRAO_MEM        0x0
-#define MCE_SRAO_LLC        0x1
-#define CMCI_UCNA_LLC       0x2
+/* Intel: Error Types */
+#define INTEL_MCE_SRAO_MEM        0x0
+#define INTEL_MCE_SRAO_LLC        0x1
+#define INTEL_CMCI_UCNA_LLC       0x2
+
+/* AMD: Memory Error */
+#define MCG_STATUS_MEM_VAL        0x5
+#define MCE_MEM_BANK              0x4
+#define MCi_STATUS_MEM_VAL        0xb4000000001c0100UL
+//#define MCi_STATUS_MEM_VAL        0xb600000000000100UL
+#define MCi_MISC_MEM_VAL          0x0
+
+/* AMD: L3 Cache Error */
+#define MCG_STATUS_L3_VAL         0x5
+#define MCE_L3_BANK               0x4
+#define MCi_STATUS_L3_VAL         0xbc000400001c010bULL
+#define MC4_MISC0_VAL             0x0
+#define MC4_MISC1_VAL             0x0
+#define MC4_MISC2_L3_VAL          0xc008000000000003ULL
+
+/* AMD: CPU corruption error */
+#define MCG_STATUS_CPU_VAL        0x5
+#define MCE_CPU_BANK              0x2
+#define MCi_STATUS_CPU_VAL        0x9200000000000000ULL
+//#define MCi_STATUS_CPU_VAL        0xb200000000000000ULL
+
+/* AMD: Error Types */
+#define AMD_MCE_MEM               0x20 /* memory error */
+#define AMD_MCE_L3                0x21 /* l3 cache */
 
 #define LOGFILE stdout
 
 int dump;
+int opt_exception;
 struct xen_mc_msrinject msr_inj;
+int cpu_is_amd;
+int cpu_is_intel;
+
 
 static void Lprintf(const char *fmt, ...)
 {
@@ -145,7 +181,7 @@ static int mca_cpuinfo(xc_interface *xc_
         return 0;
 }
 
-static int inject_cmci(xc_interface *xc_handle, int cpu_nr)
+static int intel_inject_cmci(xc_interface *xc_handle)
 {
     struct xen_mc mc;
     int nr_cpus;
@@ -191,6 +227,15 @@ static uint64_t bank_addr(int bank, int 
         case MCi_type_MISC:
             addr = MSR_IA32_MC0_CTL + (bank * 4) + type;
             break;
+        case MC4_type_MISC1:
+            addr = 0xc0000408;
+            break;
+        case MC4_type_MISC2:
+            addr = 0xc0000409;
+            break;
+        case MC4_type_MISC3:
+            addr = 0xc000040a;
+            break;
         case MCi_type_CTL2:
             addr = MSR_IA32_MC0_CTL2 + bank;
             break;
@@ -356,12 +401,11 @@ static int inject_mci_status(xc_interfac
 }
 
 static int inject_mci_misc(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint64_t bank,
-                             uint64_t val)
+                             uint32_t cpu_nr, uint32_t misctype,
+                             uint64_t bank, uint64_t val)
 {
     return add_msr_bank_intpose(xc_handle, cpu_nr, MC_MSRINJ_F_INTERPOSE,
-                                    MCi_type_MISC, bank, val); 
+                                    MCi_type_MISC + misctype, bank, val); 
 }
 
 static int inject_mci_addr(xc_interface *xc_handle,
@@ -373,10 +417,8 @@ static int inject_mci_addr(xc_interface 
                                     MCi_type_ADDR, bank, val); 
 }
 
-static int inject_llc_srao(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_llc_srao(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -390,7 +432,7 @@ static int inject_llc_srao(xc_interface 
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           MCE_SRAO_LLC_BANK, MCi_MISC_SRAO_LLC_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -407,17 +449,17 @@ static int inject_llc_srao(xc_interface 
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_mce(xc_handle, cpu_nr);
-    if ( ret )
-        err(xc_handle, "Failed to inject MCE error\n");
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
 
     return 0;
 }
 
-static int inject_mem_srao(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_mem_srao(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -431,7 +473,7 @@ static int inject_mem_srao(xc_interface 
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           MCE_SRAO_MEM_BANK, MCi_MISC_SRAO_MEM_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -448,17 +490,17 @@ static int inject_mem_srao(xc_interface 
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_mce(xc_handle, cpu_nr);
-    if ( ret )
-        err(xc_handle, "Failed to inject MCE error\n");
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
 
     return 0;
 }
 
-static int inject_llc_ucna(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_llc_ucna(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface 
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           CMCI_UCNA_LLC_BANK, MCi_MISC_UCNA_LLC_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -489,13 +531,108 @@ static int inject_llc_ucna(xc_interface 
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_cmci(xc_handle, cpu_nr);
+    ret = intel_inject_cmci(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MCE error\n");
 
     return 0;
 }
 
+static int amd_inject_mem(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
+{
+    uint64_t gpfn, mfn, haddr;
+    int ret = 0;
+
+    ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCG_STATUS MSR\n");
+
+    ret = inject_mci_status(xc_handle, cpu_nr,
+                            MCE_MEM_BANK, MCi_STATUS_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
+                          MCE_MEM_BANK, MCi_MISC_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_MISC MSR\n");
+
+    gpfn = gaddr >> PAGE_SHIFT;
+    mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn);
+    if (!mfn_valid(mfn))
+        err(xc_handle, "The MFN is not valid\n");
+    haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1));
+    ret = inject_mci_addr(xc_handle, cpu_nr, MCE_MEM_BANK, haddr);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_ADDR MSR\n");
+
+    ret = flush_msr_inj(xc_handle);
+    if ( ret )
+        err(xc_handle, "Failed to inject MSR\n");
+
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
+
+    return 0;
+}
+
+static int amd_inject_l3(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
+{
+    uint64_t gpfn, mfn, haddr;
+    int ret = 0;
+
+    ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCG_STATUS MSR\n");
+
+    ret = inject_mci_status(xc_handle, cpu_nr,
+                            MCE_L3_BANK, MCi_STATUS_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
+                          MCE_L3_BANK, MC4_MISC0_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC0 MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 1,
+                          MCE_L3_BANK, MC4_MISC1_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC1 MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 2,
+                          MCE_L3_BANK, MC4_MISC2_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC2 MSR\n");
+
+    gpfn = gaddr >> PAGE_SHIFT;
+    mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn);
+    if (!mfn_valid(mfn))
+        err(xc_handle, "The MFN is not valid\n");
+    haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1));
+    ret = inject_mci_addr(xc_handle, cpu_nr, MCE_L3_BANK, haddr);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_ADDR MSR\n");
+
+    ret = flush_msr_inj(xc_handle);
+    if ( ret )
+        err(xc_handle, "Failed to inject MSR\n");
+
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
+
+    return 0;
+}
+
+
 static long xs_get_dom_mem(int domid)
 {
     char path[128];
@@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid)
     if (!xs)
         return -1;
 
-    sprintf(path, "/local/domain/%d/memory/target", domid);
+    snprintf(path, sizeof(path), "/local/domain/%d/memory/target", domid);
     memstr = xs_read(xs, XBT_NULL, path, &plen);
     xs_daemon_close(xs);
 
@@ -540,30 +677,80 @@ static void help(void)
            "  -D, --dump           dump addr info without error injection\n"
            "  -c, --cpu=CPU_ID     target CPU\n"
            "  -d, --domain=DomID   target domain, the default is Xen itself\n"
+           "  -e                   raise MCE exception\n"
            "  -h, --help           print this page\n"
            "  -p, --phyaddr        physical address\n"
            "  -t, --type=error     error type\n"
-           "                        0 : MCE_SRAO_MEM\n"
-           "                        1 : MCE_SRAO_LLC\n"
-           "                        2 : CMCI_UCNA_LLC\n"
+           "                        0x0 : MCE_SRAO_MEM (Intel only)\n"
+           "                        0x1 : MCE_SRAO_LLC (Intel only)\n"
+           "                        0x2 : CMCI_UCNA_LLC (Intel only)\n"
+           "                        0x20: DRAM error (AMD only)\n"
+           "                        0x21: L3 cache error (AMD only)\n"
            "\n"
            );
 }
 
+static void cpuid(const unsigned int *input, unsigned int *regs)
+{
+    unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
+    asm (
+#ifdef __i386__
+        "push %%ebx; push %%edx\n\t"
+#else
+        "push %%rbx; push %%rdx\n\t"
+#endif
+        "cpuid\n\t"
+        "mov %%ebx,4(%4)\n\t"
+        "mov %%edx,12(%4)\n\t"
+#ifdef __i386__
+        "pop %%edx; pop %%ebx\n\t"
+#else
+        "pop %%rdx; pop %%rbx\n\t"
+#endif
+        : "=a" (regs[0]), "=c" (regs[2])
+        : "0" (input[0]), "1" (count), "S" (regs)
+        : "memory" );
+}
+
+/* Get the manufacturer brand name of the host processor. */
+static void cpuid_brand_get(char *str)
+{
+    unsigned int input[2] = { 0, 0 };
+    unsigned int regs[4];
+
+    cpuid(input, regs);
+
+    *(uint32_t *)(str + 0) = regs[1];
+    *(uint32_t *)(str + 4) = regs[3];
+    *(uint32_t *)(str + 8) = regs[2];
+    str[12] = '\0';
+}
+
 int main(int argc, char *argv[])
 {
-    int type = MCE_SRAO_MEM;
+    int type;
     int c, opt_index;
     uint32_t domid;
     xc_interface *xc_handle;
-    int cpu_nr;
-    int64_t gaddr, gpfn, mfn, haddr, max_gpa;
+    unsigned int cpu_nr;
+    uint64_t gaddr, gpfn, mfn, haddr, max_gpa;
+    char cpu_brand[13];
 
     /* Default Value */
     domid = DOMID_XEN;
     gaddr = 0x180020;
     cpu_nr = 0;
 
+    cpu_is_amd = cpu_is_intel = 0;
+    cpuid_brand_get(cpu_brand);
+    if (strstr(cpu_brand, "AMD"))
+        cpu_is_amd = 1;
+    else
+        cpu_is_intel = 1;
+
+    if (cpu_is_intel)
+        type = INTEL_MCE_SRAO_MEM;
+
     init_msr_inj();
     xc_handle = xc_interface_open(0, 0, 0);
     if ( !xc_handle ) {
@@ -571,8 +758,8 @@ int main(int argc, char *argv[])
         exit(EXIT_FAILURE);
     }
 
-    while ( 1 ) {
-        c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts, &opt_index);
+    for (;;) {
+        c = getopt_long(argc, argv, "c:Dd:t:hp:r:e", opts, &opt_index);
         if ( c == -1 )
             break;
         switch ( c ) {
@@ -580,23 +767,26 @@ int main(int argc, char *argv[])
             dump=1;
             break;
         case 'c':
-            cpu_nr = strtol(optarg, &optarg, 10);
+            cpu_nr = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input a digit parameter for CPU\n");
             break;
         case 'd':
-            domid = strtol(optarg, &optarg, 10);
+            domid = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input a digit parameter for domain\n");
             break;
         case 'p':
-            gaddr = strtol(optarg, &optarg, 0);
+            gaddr = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input correct page address\n");
             break;
         case 't':
             type = strtol(optarg, NULL, 0);
             break;
+        case 'e':
+            opt_exception = 1;
+            break;
         case 'h':
         default:
             help();
@@ -627,16 +817,26 @@ int main(int argc, char *argv[])
         goto out;
     }
 
-    switch ( type )
-    {
-    case MCE_SRAO_MEM:
-        inject_mem_srao(xc_handle, cpu_nr, domid, gaddr);
+    switch ( type ) {
+    case INTEL_MCE_SRAO_MEM:
+        if ( cpu_is_intel )
+            intel_inject_mem_srao(xc_handle, cpu_nr, domid, gaddr);
         break;
-    case MCE_SRAO_LLC:
-        inject_llc_srao(xc_handle, cpu_nr, domid, gaddr);
+    case INTEL_MCE_SRAO_LLC:
+        if ( cpu_is_intel )
+            intel_inject_llc_srao(xc_handle, cpu_nr, domid, gaddr);
         break;
-    case CMCI_UCNA_LLC:
-        inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr);
+    case INTEL_CMCI_UCNA_LLC:
+        if ( cpu_is_intel )
+            intel_inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr);
+        break;
+    case AMD_MCE_MEM:
+        if ( cpu_is_amd )
+            amd_inject_mem(xc_handle, cpu_nr, domid, gaddr);
+        break;
+    case AMD_MCE_L3:
+        if ( cpu_is_amd )
+            amd_inject_l3(xc_handle, cpu_nr, domid, gaddr);
         break;
     default:
         err(xc_handle, "Unsupported error type\n");

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2012-10-05 14:07 Christoph Egger
@ 2012-10-19 13:10 ` Christoph Egger
  2012-10-19 14:58   ` Jan Beulich
  2012-10-29 10:20 ` Jan Beulich
  1 sibling, 1 reply; 16+ messages in thread
From: Christoph Egger @ 2012-10-19 13:10 UTC (permalink / raw)
  To: xen-devel@lists.xen.org


Ping?

On 10/05/12 16:07, Christoph Egger wrote:
> 
> xen-mceinj: Support AMD
> 
> Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
> 


-- 
---to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Einsteinring 24, 85689 Dornach b. Muenchen
Geschaeftsfuehrer: Alberto Bozzo
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2012-10-19 13:10 ` Christoph Egger
@ 2012-10-19 14:58   ` Jan Beulich
  2012-10-19 15:01     ` Ian Jackson
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2012-10-19 14:58 UTC (permalink / raw)
  To: Christoph Egger
  Cc: Keir Fraser, Ian Jackson, Ian Campbell, xen-devel@lists.xen.org

>>> On 19.10.12 at 15:10, Christoph Egger <Christoph.Egger@amd.com> wrote:
> Ping?

I'm afraid it's not really clear who should commit this - it's tools
side code, so IanJ or IanC would normally be the ones, but otoh
it's code requiring low level hardware knowledge to review the
patch, so both of them might want to rather not do the review.
In the past it was usually Keir who eventually committed such
patches, but I don't know whether he put this on his to-look-at-
and-eventually-commit list.

Jan

> On 10/05/12 16:07, Christoph Egger wrote:
>> 
>> xen-mceinj: Support AMD
>> 
>> Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>
>> 
> 
> 
> -- 
> ---to satisfy European Law for business letters:
> Advanced Micro Devices GmbH
> Einsteinring 24, 85689 Dornach b. Muenchen
> Geschaeftsfuehrer: Alberto Bozzo
> Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
> Registergericht Muenchen, HRB Nr. 43632
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org 
> http://lists.xen.org/xen-devel 

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2012-10-19 14:58   ` Jan Beulich
@ 2012-10-19 15:01     ` Ian Jackson
  2012-10-19 15:05       ` Jan Beulich
  0 siblings, 1 reply; 16+ messages in thread
From: Ian Jackson @ 2012-10-19 15:01 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Christoph Egger, Keir (Xen.org), Ian Campbell,
	xen-devel@lists.xen.org

Jan Beulich writes ("Re: [Xen-devel] [PATCH] tools/xen-mceinj: support AMD"):
> >>> On 19.10.12 at 15:10, Christoph Egger <Christoph.Egger@amd.com> wrote:
> > Ping?
> 
> I'm afraid it's not really clear who should commit this - it's tools
> side code, so IanJ or IanC would normally be the ones, but otoh
> it's code requiring low level hardware knowledge to review the
> patch, so both of them might want to rather not do the review.
> In the past it was usually Keir who eventually committed such
> patches, but I don't know whether he put this on his to-look-at-
> and-eventually-commit list.

My view is that I would like an ack from someone who understands
what's going on ...

Ian.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2012-10-19 15:01     ` Ian Jackson
@ 2012-10-19 15:05       ` Jan Beulich
  2012-11-12 16:25         ` Ian Campbell
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2012-10-19 15:05 UTC (permalink / raw)
  To: Ian Jackson
  Cc: Christoph Egger, Keir (Xen.org), Ian Campbell,
	xen-devel@lists.xen.org

>>> On 19.10.12 at 17:01, Ian Jackson <Ian.Jackson@eu.citrix.com> wrote:
> Jan Beulich writes ("Re: [Xen-devel] [PATCH] tools/xen-mceinj: support AMD"):
>> >>> On 19.10.12 at 15:10, Christoph Egger <Christoph.Egger@amd.com> wrote:
>> > Ping?
>> 
>> I'm afraid it's not really clear who should commit this - it's tools
>> side code, so IanJ or IanC would normally be the ones, but otoh
>> it's code requiring low level hardware knowledge to review the
>> patch, so both of them might want to rather not do the review.
>> In the past it was usually Keir who eventually committed such
>> patches, but I don't know whether he put this on his to-look-at-
>> and-eventually-commit list.
> 
> My view is that I would like an ack from someone who understands
> what's going on ...

Which would ideally be those who introduced the code, i.e.
Intel folks if I'm not mistaken...

Jan

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2012-10-05 14:07 Christoph Egger
  2012-10-19 13:10 ` Christoph Egger
@ 2012-10-29 10:20 ` Jan Beulich
  1 sibling, 0 replies; 16+ messages in thread
From: Jan Beulich @ 2012-10-29 10:20 UTC (permalink / raw)
  To: Christoph Egger; +Cc: xen-devel@lists.xen.org

>>> On 05.10.12 at 16:07, Christoph Egger <Christoph.Egger@amd.com> wrote:
> #define LOGFILE stdout
> 
> int dump;
>+int opt_exception;
> struct xen_mc_msrinject msr_inj;
>+int cpu_is_amd;
>+int cpu_is_intel;

Albeit I realize that this isn't the case with the context code here,
let's not continue bad habits: The newly added variables should
be static and - as long as not precluded by their use - bool.

>+
> 
> static void Lprintf(const char *fmt, ...)
> {
>...
>         case MCi_type_MISC:
>             addr = MSR_IA32_MC0_CTL + (bank * 4) + type;
>             break;
>+        case MC4_type_MISC1:
>+            addr = 0xc0000408;
>+            break;
>+        case MC4_type_MISC2:
>+            addr = 0xc0000409;
>+            break;
>+        case MC4_type_MISC3:
>+            addr = 0xc000040a;
>+            break;
>         case MCi_type_CTL2:
>             addr = MSR_IA32_MC0_CTL2 + bank;
>             break;

What makes it only bank 4 being added here? This question also
applies to the hypervisor side patch you sent on Friday.

>-    sprintf(path, "/local/domain/%d/memory/target", domid);
>+    snprintf(path, sizeof(path), "/local/domain/%d/memory/target", domid);
 
While fine with me, this is completely unrelated.

>+static void cpuid(const unsigned int *input, unsigned int *regs)

While it makes no difference to the treatment of the parameter or
the generated code, this should still be "unsigned int regs[4]" for
documentation purposes.

>+{
>+    unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
>+    asm (
>+#ifdef __i386__
>+        "push %%ebx; push %%edx\n\t"
>+#else
>+        "push %%rbx; push %%rdx\n\t"
>+#endif
>+        "cpuid\n\t"
>+        "mov %%ebx,4(%4)\n\t"
>+        "mov %%edx,12(%4)\n\t"
>+#ifdef __i386__
>+        "pop %%edx; pop %%ebx\n\t"
>+#else
>+        "pop %%rdx; pop %%rbx\n\t"
>+#endif
>+        : "=a" (regs[0]), "=c" (regs[2])
>+        : "0" (input[0]), "1" (count), "S" (regs)
>+        : "memory" );
>+}

What did you clone this from? It re-introduces a bug long fixed in
libxc (use of push/pop here collides with the 64-bit red zone; see
tools/libxc/xc_cpuid_x86.c:cpuid() and its history).

>+static void cpuid_brand_get(char *str)
>+{
>+    unsigned int input[2] = { 0, 0 };
>+    unsigned int regs[4];
>+
>+    cpuid(input, regs);
>+
>+    *(uint32_t *)(str + 0) = regs[1];
>+    *(uint32_t *)(str + 4) = regs[3];
>+    *(uint32_t *)(str + 8) = regs[2];
>+    str[12] = '\0';
>+}

I believe that by way of using a suitably defined union you can
get away here without any type casts (which I would generally
expect the compiler to warn about, as I think [hope] that the
tools don't get built with -fno-strict-aliasing).

Also, the file use hypervisor coding style, so please follow that
in the adjustments you are doing (and in particular please don't
do any adjustments just to _remove_ that coding style).

>-    int type = MCE_SRAO_MEM;
>+    int type;
>...
>+    if (cpu_is_intel)
>+        type = INTEL_MCE_SRAO_MEM;

So on AMD "type" remains uninitialized unless the -t option was
used?

Jan

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2012-10-19 15:05       ` Jan Beulich
@ 2012-11-12 16:25         ` Ian Campbell
  2012-11-13  1:40           ` Hao, Xudong
  0 siblings, 1 reply; 16+ messages in thread
From: Ian Campbell @ 2012-11-12 16:25 UTC (permalink / raw)
  To: Jan Beulich
  Cc: Jinsong, Haicheng Li, Christoph Egger, Keir (Xen.org), XudongHao,
	YunhongJiang, Ian Jackson, xen-devel@lists.xen.org

On Fri, 2012-10-19 at 16:05 +0100, Jan Beulich wrote:
> >>> On 19.10.12 at 17:01, Ian Jackson <Ian.Jackson@eu.citrix.com> wrote:
> > Jan Beulich writes ("Re: [Xen-devel] [PATCH] tools/xen-mceinj: support AMD"):
> >> >>> On 19.10.12 at 15:10, Christoph Egger <Christoph.Egger@amd.com> wrote:
> >> > Ping?
> >> 
> >> I'm afraid it's not really clear who should commit this - it's tools
> >> side code, so IanJ or IanC would normally be the ones, but otoh
> >> it's code requiring low level hardware knowledge to review the
> >> patch, so both of them might want to rather not do the review.
> >> In the past it was usually Keir who eventually committed such
> >> patches, but I don't know whether he put this on his to-look-at-
> >> and-eventually-commit list.
> > 
> > My view is that I would like an ack from someone who understands
> > what's going on ...
> 
> Which would ideally be those who introduced the code, i.e.
> Intel folks if I'm not mistaken...

Lets CC some of them then.

Intel folks -- any opinion on the patch below from Christoph?

8<----------------

# User Christoph Egger
# Date 1349437062 -7200
xen mceinj: support AMD.

Signed-off-by: Christoph Egger <Christoph.Egger@amd.com>

diff -r 21704bc429b4 -r 1a3eea784e09 tools/tests/mce-test/tools/xen-mceinj.c
--- a/tools/tests/mce-test/tools/xen-mceinj.c
+++ b/tools/tests/mce-test/tools/xen-mceinj.c
@@ -1,6 +1,7 @@
 /*
  * xen-mceinj.c: utilities to inject fake MCE for x86.
  * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2012, AMD Cooperation Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -18,6 +19,7 @@
  * Authors: Yunhong Jiang <yunhong.jiang@intel.com>
  *          Haicheng Li <haicheng.li@intel.com>
  *          Xudong Hao <xudong.hao@intel.com>
+ *          Christoph Egger <Christoph.Egger@amd.com>
  */
 
 
@@ -44,11 +46,14 @@
 #define MCi_type_STATUS     0x1
 #define MCi_type_ADDR       0x2
 #define MCi_type_MISC       0x3
-#define MCi_type_CTL2       0x4
+#define MC4_type_MISC1      0x4
+#define MC4_type_MISC2      0x5
+#define MC4_type_MISC3      0x6
+#define MCi_type_CTL2       0x7
 
 #define INVALID_MSR         ~0UL
 
-/* Intel MSRs */
+/* X86 machine check MSRs */
 #define MSR_IA32_MCG_CAP         0x00000179
 #define MSR_IA32_MCG_STATUS      0x0000017a
 #define MSR_IA32_MCG_CTL         0x0000017b
@@ -56,35 +61,66 @@
 #define MSR_IA32_MC0_STATUS      0x00000401
 #define MSR_IA32_MC0_ADDR        0x00000402
 #define MSR_IA32_MC0_MISC        0x00000403
+
+/* Intel MSRs */
 #define MSR_IA32_MC0_CTL2        0x00000280
 
-/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */
+/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */
 #define MCG_STATUS_SRAO_LLC_VAL  0x5
 #define MCE_SRAO_LLC_BANK        0x7
 #define MCi_STATUS_SRAO_LLC_VAL  0xBD2000008000017AUL
 #define MCi_MISC_SRAO_LLC_VAL    0x86UL
 
-/* Memory Patrol Scrub SRAO MCE */
+/* Intel: Memory Patrol Scrub SRAO MCE */
 #define MCG_STATUS_SRAO_MEM_VAL  0x5
 #define MCE_SRAO_MEM_BANK        0x8
 #define MCi_STATUS_SRAO_MEM_VAL  0xBD000000004000CFUL
 #define MCi_MISC_SRAO_MEM_VAL    0x86UL
 
-/* LLC EWB UCNA Error */
+/* Intel: LLC EWB UCNA Error */
 #define MCG_STATUS_UCNA_LLC_VAL  0x0
 #define CMCI_UCNA_LLC_BANK       0x9
 #define MCi_STATUS_UCNA_LLC_VAL  0xBC20000080000136UL
 #define MCi_MISC_UCNA_LLC_VAL    0x86UL
 
-/* Error Types */
-#define MCE_SRAO_MEM        0x0
-#define MCE_SRAO_LLC        0x1
-#define CMCI_UCNA_LLC       0x2
+/* Intel: Error Types */
+#define INTEL_MCE_SRAO_MEM        0x0
+#define INTEL_MCE_SRAO_LLC        0x1
+#define INTEL_CMCI_UCNA_LLC       0x2
+
+/* AMD: Memory Error */
+#define MCG_STATUS_MEM_VAL        0x5
+#define MCE_MEM_BANK              0x4
+#define MCi_STATUS_MEM_VAL        0xb4000000001c0100UL
+//#define MCi_STATUS_MEM_VAL        0xb600000000000100UL
+#define MCi_MISC_MEM_VAL          0x0
+
+/* AMD: L3 Cache Error */
+#define MCG_STATUS_L3_VAL         0x5
+#define MCE_L3_BANK               0x4
+#define MCi_STATUS_L3_VAL         0xbc000400001c010bULL
+#define MC4_MISC0_VAL             0x0
+#define MC4_MISC1_VAL             0x0
+#define MC4_MISC2_L3_VAL          0xc008000000000003ULL
+
+/* AMD: CPU corruption error */
+#define MCG_STATUS_CPU_VAL        0x5
+#define MCE_CPU_BANK              0x2
+#define MCi_STATUS_CPU_VAL        0x9200000000000000ULL
+//#define MCi_STATUS_CPU_VAL        0xb200000000000000ULL
+
+/* AMD: Error Types */
+#define AMD_MCE_MEM               0x20 /* memory error */
+#define AMD_MCE_L3                0x21 /* l3 cache */
 
 #define LOGFILE stdout
 
 int dump;
+int opt_exception;
 struct xen_mc_msrinject msr_inj;
+int cpu_is_amd;
+int cpu_is_intel;
+
 
 static void Lprintf(const char *fmt, ...)
 {
@@ -145,7 +181,7 @@ static int mca_cpuinfo(xc_interface *xc_
         return 0;
 }
 
-static int inject_cmci(xc_interface *xc_handle, int cpu_nr)
+static int intel_inject_cmci(xc_interface *xc_handle)
 {
     struct xen_mc mc;
     int nr_cpus;
@@ -191,6 +227,15 @@ static uint64_t bank_addr(int bank, int 
         case MCi_type_MISC:
             addr = MSR_IA32_MC0_CTL + (bank * 4) + type;
             break;
+        case MC4_type_MISC1:
+            addr = 0xc0000408;
+            break;
+        case MC4_type_MISC2:
+            addr = 0xc0000409;
+            break;
+        case MC4_type_MISC3:
+            addr = 0xc000040a;
+            break;
         case MCi_type_CTL2:
             addr = MSR_IA32_MC0_CTL2 + bank;
             break;
@@ -356,12 +401,11 @@ static int inject_mci_status(xc_interfac
 }
 
 static int inject_mci_misc(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint64_t bank,
-                             uint64_t val)
+                             uint32_t cpu_nr, uint32_t misctype,
+                             uint64_t bank, uint64_t val)
 {
     return add_msr_bank_intpose(xc_handle, cpu_nr, MC_MSRINJ_F_INTERPOSE,
-                                    MCi_type_MISC, bank, val); 
+                                    MCi_type_MISC + misctype, bank, val); 
 }
 
 static int inject_mci_addr(xc_interface *xc_handle,
@@ -373,10 +417,8 @@ static int inject_mci_addr(xc_interface 
                                     MCi_type_ADDR, bank, val); 
 }
 
-static int inject_llc_srao(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_llc_srao(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -390,7 +432,7 @@ static int inject_llc_srao(xc_interface 
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           MCE_SRAO_LLC_BANK, MCi_MISC_SRAO_LLC_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -407,17 +449,17 @@ static int inject_llc_srao(xc_interface 
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_mce(xc_handle, cpu_nr);
-    if ( ret )
-        err(xc_handle, "Failed to inject MCE error\n");
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
 
     return 0;
 }
 
-static int inject_mem_srao(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_mem_srao(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -431,7 +473,7 @@ static int inject_mem_srao(xc_interface 
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           MCE_SRAO_MEM_BANK, MCi_MISC_SRAO_MEM_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -448,17 +490,17 @@ static int inject_mem_srao(xc_interface 
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_mce(xc_handle, cpu_nr);
-    if ( ret )
-        err(xc_handle, "Failed to inject MCE error\n");
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
 
     return 0;
 }
 
-static int inject_llc_ucna(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_llc_ucna(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface 
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           CMCI_UCNA_LLC_BANK, MCi_MISC_UCNA_LLC_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -489,13 +531,108 @@ static int inject_llc_ucna(xc_interface 
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_cmci(xc_handle, cpu_nr);
+    ret = intel_inject_cmci(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MCE error\n");
 
     return 0;
 }
 
+static int amd_inject_mem(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
+{
+    uint64_t gpfn, mfn, haddr;
+    int ret = 0;
+
+    ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCG_STATUS MSR\n");
+
+    ret = inject_mci_status(xc_handle, cpu_nr,
+                            MCE_MEM_BANK, MCi_STATUS_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
+                          MCE_MEM_BANK, MCi_MISC_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_MISC MSR\n");
+
+    gpfn = gaddr >> PAGE_SHIFT;
+    mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn);
+    if (!mfn_valid(mfn))
+        err(xc_handle, "The MFN is not valid\n");
+    haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1));
+    ret = inject_mci_addr(xc_handle, cpu_nr, MCE_MEM_BANK, haddr);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_ADDR MSR\n");
+
+    ret = flush_msr_inj(xc_handle);
+    if ( ret )
+        err(xc_handle, "Failed to inject MSR\n");
+
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
+
+    return 0;
+}
+
+static int amd_inject_l3(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
+{
+    uint64_t gpfn, mfn, haddr;
+    int ret = 0;
+
+    ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCG_STATUS MSR\n");
+
+    ret = inject_mci_status(xc_handle, cpu_nr,
+                            MCE_L3_BANK, MCi_STATUS_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
+                          MCE_L3_BANK, MC4_MISC0_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC0 MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 1,
+                          MCE_L3_BANK, MC4_MISC1_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC1 MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 2,
+                          MCE_L3_BANK, MC4_MISC2_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC2 MSR\n");
+
+    gpfn = gaddr >> PAGE_SHIFT;
+    mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn);
+    if (!mfn_valid(mfn))
+        err(xc_handle, "The MFN is not valid\n");
+    haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1));
+    ret = inject_mci_addr(xc_handle, cpu_nr, MCE_L3_BANK, haddr);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_ADDR MSR\n");
+
+    ret = flush_msr_inj(xc_handle);
+    if ( ret )
+        err(xc_handle, "Failed to inject MSR\n");
+
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
+
+    return 0;
+}
+
+
 static long xs_get_dom_mem(int domid)
 {
     char path[128];
@@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid)
     if (!xs)
         return -1;
 
-    sprintf(path, "/local/domain/%d/memory/target", domid);
+    snprintf(path, sizeof(path), "/local/domain/%d/memory/target", domid);
     memstr = xs_read(xs, XBT_NULL, path, &plen);
     xs_daemon_close(xs);
 
@@ -540,30 +677,80 @@ static void help(void)
            "  -D, --dump           dump addr info without error injection\n"
            "  -c, --cpu=CPU_ID     target CPU\n"
            "  -d, --domain=DomID   target domain, the default is Xen itself\n"
+           "  -e                   raise MCE exception\n"
            "  -h, --help           print this page\n"
            "  -p, --phyaddr        physical address\n"
            "  -t, --type=error     error type\n"
-           "                        0 : MCE_SRAO_MEM\n"
-           "                        1 : MCE_SRAO_LLC\n"
-           "                        2 : CMCI_UCNA_LLC\n"
+           "                        0x0 : MCE_SRAO_MEM (Intel only)\n"
+           "                        0x1 : MCE_SRAO_LLC (Intel only)\n"
+           "                        0x2 : CMCI_UCNA_LLC (Intel only)\n"
+           "                        0x20: DRAM error (AMD only)\n"
+           "                        0x21: L3 cache error (AMD only)\n"
            "\n"
            );
 }
 
+static void cpuid(const unsigned int *input, unsigned int *regs)
+{
+    unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
+    asm (
+#ifdef __i386__
+        "push %%ebx; push %%edx\n\t"
+#else
+        "push %%rbx; push %%rdx\n\t"
+#endif
+        "cpuid\n\t"
+        "mov %%ebx,4(%4)\n\t"
+        "mov %%edx,12(%4)\n\t"
+#ifdef __i386__
+        "pop %%edx; pop %%ebx\n\t"
+#else
+        "pop %%rdx; pop %%rbx\n\t"
+#endif
+        : "=a" (regs[0]), "=c" (regs[2])
+        : "0" (input[0]), "1" (count), "S" (regs)
+        : "memory" );
+}
+
+/* Get the manufacturer brand name of the host processor. */
+static void cpuid_brand_get(char *str)
+{
+    unsigned int input[2] = { 0, 0 };
+    unsigned int regs[4];
+
+    cpuid(input, regs);
+
+    *(uint32_t *)(str + 0) = regs[1];
+    *(uint32_t *)(str + 4) = regs[3];
+    *(uint32_t *)(str + 8) = regs[2];
+    str[12] = '\0';
+}
+
 int main(int argc, char *argv[])
 {
-    int type = MCE_SRAO_MEM;
+    int type;
     int c, opt_index;
     uint32_t domid;
     xc_interface *xc_handle;
-    int cpu_nr;
-    int64_t gaddr, gpfn, mfn, haddr, max_gpa;
+    unsigned int cpu_nr;
+    uint64_t gaddr, gpfn, mfn, haddr, max_gpa;
+    char cpu_brand[13];
 
     /* Default Value */
     domid = DOMID_XEN;
     gaddr = 0x180020;
     cpu_nr = 0;
 
+    cpu_is_amd = cpu_is_intel = 0;
+    cpuid_brand_get(cpu_brand);
+    if (strstr(cpu_brand, "AMD"))
+        cpu_is_amd = 1;
+    else
+        cpu_is_intel = 1;
+
+    if (cpu_is_intel)
+        type = INTEL_MCE_SRAO_MEM;
+
     init_msr_inj();
     xc_handle = xc_interface_open(0, 0, 0);
     if ( !xc_handle ) {
@@ -571,8 +758,8 @@ int main(int argc, char *argv[])
         exit(EXIT_FAILURE);
     }
 
-    while ( 1 ) {
-        c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts, &opt_index);
+    for (;;) {
+        c = getopt_long(argc, argv, "c:Dd:t:hp:r:e", opts, &opt_index);
         if ( c == -1 )
             break;
         switch ( c ) {
@@ -580,23 +767,26 @@ int main(int argc, char *argv[])
             dump=1;
             break;
         case 'c':
-            cpu_nr = strtol(optarg, &optarg, 10);
+            cpu_nr = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input a digit parameter for CPU\n");
             break;
         case 'd':
-            domid = strtol(optarg, &optarg, 10);
+            domid = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input a digit parameter for domain\n");
             break;
         case 'p':
-            gaddr = strtol(optarg, &optarg, 0);
+            gaddr = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input correct page address\n");
             break;
         case 't':
             type = strtol(optarg, NULL, 0);
             break;
+        case 'e':
+            opt_exception = 1;
+            break;
         case 'h':
         default:
             help();
@@ -627,16 +817,26 @@ int main(int argc, char *argv[])
         goto out;
     }
 
-    switch ( type )
-    {
-    case MCE_SRAO_MEM:
-        inject_mem_srao(xc_handle, cpu_nr, domid, gaddr);
+    switch ( type ) {
+    case INTEL_MCE_SRAO_MEM:
+        if ( cpu_is_intel )
+            intel_inject_mem_srao(xc_handle, cpu_nr, domid, gaddr);
         break;
-    case MCE_SRAO_LLC:
-        inject_llc_srao(xc_handle, cpu_nr, domid, gaddr);
+    case INTEL_MCE_SRAO_LLC:
+        if ( cpu_is_intel )
+            intel_inject_llc_srao(xc_handle, cpu_nr, domid, gaddr);
         break;
-    case CMCI_UCNA_LLC:
-        inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr);
+    case INTEL_CMCI_UCNA_LLC:
+        if ( cpu_is_intel )
+            intel_inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr);
+        break;
+    case AMD_MCE_MEM:
+        if ( cpu_is_amd )
+            amd_inject_mem(xc_handle, cpu_nr, domid, gaddr);
+        break;
+    case AMD_MCE_L3:
+        if ( cpu_is_amd )
+            amd_inject_l3(xc_handle, cpu_nr, domid, gaddr);
         break;
     default:
         err(xc_handle, "Unsupported error type\n");

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2012-11-12 16:25         ` Ian Campbell
@ 2012-11-13  1:40           ` Hao, Xudong
  2012-11-13  8:40             ` Jan Beulich
  0 siblings, 1 reply; 16+ messages in thread
From: Hao, Xudong @ 2012-11-13  1:40 UTC (permalink / raw)
  To: Ian Campbell, Christoph Egger
  Cc: Liu, Jinsong, Li, Haicheng, Keir (Xen.org), Jiang, Yunhong,
	Ian Jackson, xen-devel@lists.xen.org, Jan Beulich

> -----Original Message-----
> From: Ian Campbell [mailto:Ian.Campbell@citrix.com]
> Sent: Tuesday, November 13, 2012 12:26 AM
> To: Jan Beulich
> Cc: Ian Jackson; Christoph Egger; xen-devel@lists.xen.org; Keir (Xen.org); Liu,
> Jinsong; Liu, Jinsong; Jiang, Yunhong; Li, Haicheng; Hao, Xudong
> Subject: Re: [Xen-devel] [PATCH] tools/xen-mceinj: support AMD
> 
> On Fri, 2012-10-19 at 16:05 +0100, Jan Beulich wrote:
> > >>> On 19.10.12 at 17:01, Ian Jackson <Ian.Jackson@eu.citrix.com> wrote:
> > > Jan Beulich writes ("Re: [Xen-devel] [PATCH] tools/xen-mceinj: support
> AMD"):
> > >> >>> On 19.10.12 at 15:10, Christoph Egger <Christoph.Egger@amd.com>
> wrote:
> > >> > Ping?
> > >>
> > >> I'm afraid it's not really clear who should commit this - it's tools
> > >> side code, so IanJ or IanC would normally be the ones, but otoh
> > >> it's code requiring low level hardware knowledge to review the
> > >> patch, so both of them might want to rather not do the review.
> > >> In the past it was usually Keir who eventually committed such
> > >> patches, but I don't know whether he put this on his to-look-at-
> > >> and-eventually-commit list.
> > >
> > > My view is that I would like an ack from someone who understands
> > > what's going on ...
> >
> > Which would ideally be those who introduced the code, i.e.
> > Intel folks if I'm not mistaken...
> 
> Lets CC some of them then.
> 
> Intel folks -- any opinion on the patch below from Christoph?
> 

It's ok for me except for one comments. See below.
Of course I did not review the AMD only code.

> +    if (strstr(cpu_brand, "AMD"))
> +        cpu_is_amd = 1;
> +    else
> +        cpu_is_intel = 1;
> +
> +    if (cpu_is_intel)
> +        type = INTEL_MCE_SRAO_MEM;
> +

Isn't necessary to set a default AMD type? The "-t" parameter is required for amd but not always for Intel for users, it's better to give a unified usage for all users.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2012-11-13  1:40           ` Hao, Xudong
@ 2012-11-13  8:40             ` Jan Beulich
  0 siblings, 0 replies; 16+ messages in thread
From: Jan Beulich @ 2012-11-13  8:40 UTC (permalink / raw)
  To: Christoph Egger, Xudong Hao
  Cc: Haicheng Li, Jinsong Liu, Keir (Xen.org), Ian Campbell,
	Yunhong Jiang, Ian Jackson, xen-devel@lists.xen.org

>>> On 13.11.12 at 02:40, "Hao, Xudong" <xudong.hao@intel.com> wrote:
>>  -----Original Message-----
>> From: Ian Campbell [mailto:Ian.Campbell@citrix.com]
>> Sent: Tuesday, November 13, 2012 12:26 AM
>> To: Jan Beulich
>> Cc: Ian Jackson; Christoph Egger; xen-devel@lists.xen.org; Keir (Xen.org); 
> Liu,
>> Jinsong; Liu, Jinsong; Jiang, Yunhong; Li, Haicheng; Hao, Xudong
>> Subject: Re: [Xen-devel] [PATCH] tools/xen-mceinj: support AMD
>> 
>> On Fri, 2012-10-19 at 16:05 +0100, Jan Beulich wrote:
>> > >>> On 19.10.12 at 17:01, Ian Jackson <Ian.Jackson@eu.citrix.com> wrote:
>> > > Jan Beulich writes ("Re: [Xen-devel] [PATCH] tools/xen-mceinj: support
>> AMD"):
>> > >> >>> On 19.10.12 at 15:10, Christoph Egger <Christoph.Egger@amd.com>
>> wrote:
>> > >> > Ping?
>> > >>
>> > >> I'm afraid it's not really clear who should commit this - it's tools
>> > >> side code, so IanJ or IanC would normally be the ones, but otoh
>> > >> it's code requiring low level hardware knowledge to review the
>> > >> patch, so both of them might want to rather not do the review.
>> > >> In the past it was usually Keir who eventually committed such
>> > >> patches, but I don't know whether he put this on his to-look-at-
>> > >> and-eventually-commit list.
>> > >
>> > > My view is that I would like an ack from someone who understands
>> > > what's going on ...
>> >
>> > Which would ideally be those who introduced the code, i.e.
>> > Intel folks if I'm not mistaken...
>> 
>> Lets CC some of them then.
>> 
>> Intel folks -- any opinion on the patch below from Christoph?
>> 
> 
> It's ok for me except for one comments. See below.
> Of course I did not review the AMD only code.
> 
>> +    if (strstr(cpu_brand, "AMD"))
>> +        cpu_is_amd = 1;
>> +    else
>> +        cpu_is_intel = 1;
>> +
>> +    if (cpu_is_intel)
>> +        type = INTEL_MCE_SRAO_MEM;
>> +
> 
> Isn't necessary to set a default AMD type? The "-t" parameter is required 
> for amd but not always for Intel for users, it's better to give a unified 
> usage for all users.

Yes, I had pointed this out in an earlier reply already, as much as
I had asked (in the context of a matching hypervisor side patch)
about the reason for only dealing with bank 4 registers here.

Jan

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH] tools/xen-mceinj: support AMD
@ 2013-03-27 13:41 Egger Christoph
  2013-03-28 11:44 ` Jan Beulich
  0 siblings, 1 reply; 16+ messages in thread
From: Egger Christoph @ 2013-03-27 13:41 UTC (permalink / raw)
  To: xen-devel, Jan Beulich

[-- Attachment #1: Type: text/plain, Size: 336 bytes --]


Hi,

Initial patch sent out to xen-devel:
http://lists.xen.org/archives/html/xen-devel/2012-10/msg00567.html

Feedback from Jan Beulich:
http://lists.xen.org/archives/html/xen-devel/2012-10/msg02135.html

This patch integrates feedback from Jan Beulich and did some
additional cleanup.

Run tests on Intel machines. Works as expected.

[-- Attachment #2: xen_mceinj.diff --]
[-- Type: text/plain, Size: 19076 bytes --]

commit dabb873d62c2c694aaeaa8345223bb272950abae
Author: Christoph Egger <chegger@amazon.de>
Date:   Wed Feb 27 14:52:19 2013 +0000

    xen-mceinj: Support AMD. Add -e option.
    
    Add support for AMD.
    Add -e option to raise an exception.
    
    Signed-off-by: Christoph Egger <chegger@amazon.de>

diff --git a/tools/tests/mce-test/tools/xen-mceinj.c b/tools/tests/mce-test/tools/xen-mceinj.c
index b69c65d..94fcb22 100644
--- a/tools/tests/mce-test/tools/xen-mceinj.c
+++ b/tools/tests/mce-test/tools/xen-mceinj.c
@@ -1,6 +1,8 @@
 /*
  * xen-mceinj.c: utilities to inject fake MCE for x86.
  * Copyright (c) 2010, Intel Corporation.
+ * Copyright (c) 2012, AMD Cooperation Inc.
+ * Copyright (c) 2013, Amazon.com, Inc. or its affiliates.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms and conditions of the GNU General Public License,
@@ -18,6 +20,7 @@
  * Authors: Yunhong Jiang <yunhong.jiang@intel.com>
  *          Haicheng Li <haicheng.li@intel.com>
  *          Xudong Hao <xudong.hao@intel.com>
+ *          Christoph Egger <chegger@amazon.de>
  */
 
 
@@ -44,11 +47,14 @@
 #define MCi_type_STATUS     0x1
 #define MCi_type_ADDR       0x2
 #define MCi_type_MISC       0x3
-#define MCi_type_CTL2       0x4
+#define MC4_type_MISC1      0x4
+#define MC4_type_MISC2      0x5
+#define MC4_type_MISC3      0x6
+#define MCi_type_CTL2       0x7
 
 #define INVALID_MSR         ~0UL
 
-/* Intel MSRs */
+/* X86 machine check MSRs */
 #define MSR_IA32_MCG_CAP         0x00000179
 #define MSR_IA32_MCG_STATUS      0x0000017a
 #define MSR_IA32_MCG_CTL         0x0000017b
@@ -56,35 +62,63 @@
 #define MSR_IA32_MC0_STATUS      0x00000401
 #define MSR_IA32_MC0_ADDR        0x00000402
 #define MSR_IA32_MC0_MISC        0x00000403
+
+/* Intel MSRs */
 #define MSR_IA32_MC0_CTL2        0x00000280
 
-/* LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */
+/* Intel: LLC (Last Level Cache) EWB (Explicit Write Back) SRAO MCE */
 #define MCG_STATUS_SRAO_LLC_VAL  0x5
 #define MCE_SRAO_LLC_BANK        0x7
 #define MCi_STATUS_SRAO_LLC_VAL  0xBD2000008000017AUL
 #define MCi_MISC_SRAO_LLC_VAL    0x86UL
 
-/* Memory Patrol Scrub SRAO MCE */
+/* Intel: Memory Patrol Scrub SRAO MCE */
 #define MCG_STATUS_SRAO_MEM_VAL  0x5
 #define MCE_SRAO_MEM_BANK        0x8
 #define MCi_STATUS_SRAO_MEM_VAL  0xBD000000004000CFUL
 #define MCi_MISC_SRAO_MEM_VAL    0x86UL
 
-/* LLC EWB UCNA Error */
+/* Intel: LLC EWB UCNA Error */
 #define MCG_STATUS_UCNA_LLC_VAL  0x0
 #define CMCI_UCNA_LLC_BANK       0x9
 #define MCi_STATUS_UCNA_LLC_VAL  0xBC20000080000136UL
 #define MCi_MISC_UCNA_LLC_VAL    0x86UL
 
-/* Error Types */
-#define MCE_SRAO_MEM        0x0
-#define MCE_SRAO_LLC        0x1
-#define CMCI_UCNA_LLC       0x2
+/* Intel: Error Types */
+#define INTEL_MCE_SRAO_MEM        0x0
+#define INTEL_MCE_SRAO_LLC        0x1
+#define INTEL_CMCI_UCNA_LLC       0x2
+
+/* AMD: Memory Error */
+#define MCG_STATUS_MEM_VAL        0x5
+#define MCE_MEM_BANK              0x4
+#define MCi_STATUS_MEM_VAL        0xb4000000001c0100UL
+//#define MCi_STATUS_MEM_VAL        0xb600000000000100UL
+#define MCi_MISC_MEM_VAL          0x0
+
+/* AMD: L3 Cache Error */
+#define MCG_STATUS_L3_VAL         0x5
+#define MCE_L3_BANK               0x4
+#define MCi_STATUS_L3_VAL         0xbc000400001c010bULL
+#define MC4_MISC0_VAL             0x0
+#define MC4_MISC1_VAL             0x0
+#define MC4_MISC2_L3_VAL          0xc008000000000003ULL
+
+/* AMD: Error Types */
+#define AMD_MCE_MEM               0x0 /* memory error */
+#define AMD_MCE_L3                0x1 /* l3 cache */
 
 #define LOGFILE stdout
 
-int dump;
-struct xen_mc_msrinject msr_inj;
+static int dump;
+static int opt_exception;
+static struct xen_mc_msrinject msr_inj;
+
+#define CPU_VENDOR_UNKNOWN  -1
+#define CPU_VENDOR_AMD       0
+#define CPU_VENDOR_INTEL     1
+static int cpu_vendor;
+
 
 static void Lprintf(const char *fmt, ...)
 {
@@ -145,7 +179,7 @@ static int mca_cpuinfo(xc_interface *xc_handle)
         return 0;
 }
 
-static int inject_cmci(xc_interface *xc_handle, int cpu_nr)
+static int intel_inject_cmci(xc_interface *xc_handle)
 {
     struct xen_mc mc;
     int nr_cpus;
@@ -191,6 +225,15 @@ static uint64_t bank_addr(int bank, int type)
         case MCi_type_MISC:
             addr = MSR_IA32_MC0_CTL + (bank * 4) + type;
             break;
+        case MC4_type_MISC1:
+            addr = 0xc0000408;
+            break;
+        case MC4_type_MISC2:
+            addr = 0xc0000409;
+            break;
+        case MC4_type_MISC3:
+            addr = 0xc000040a;
+            break;
         case MCi_type_CTL2:
             addr = MSR_IA32_MC0_CTL2 + bank;
             break;
@@ -356,12 +399,11 @@ static int inject_mci_status(xc_interface *xc_handle,
 }
 
 static int inject_mci_misc(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint64_t bank,
-                             uint64_t val)
+                             uint32_t cpu_nr, uint32_t misctype,
+                             uint64_t bank, uint64_t val)
 {
     return add_msr_bank_intpose(xc_handle, cpu_nr, MC_MSRINJ_F_INTERPOSE,
-                                    MCi_type_MISC, bank, val); 
+                                    MCi_type_MISC + misctype, bank, val); 
 }
 
 static int inject_mci_addr(xc_interface *xc_handle,
@@ -373,10 +415,8 @@ static int inject_mci_addr(xc_interface *xc_handle,
                                     MCi_type_ADDR, bank, val); 
 }
 
-static int inject_llc_srao(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_llc_srao(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -390,7 +430,7 @@ static int inject_llc_srao(xc_interface *xc_handle,
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           MCE_SRAO_LLC_BANK, MCi_MISC_SRAO_LLC_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -407,17 +447,18 @@ static int inject_llc_srao(xc_interface *xc_handle,
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_mce(xc_handle, cpu_nr);
-    if ( ret )
-        err(xc_handle, "Failed to inject MCE error\n");
+
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
 
     return 0;
 }
 
-static int inject_mem_srao(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_mem_srao(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -431,7 +472,7 @@ static int inject_mem_srao(xc_interface *xc_handle,
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           MCE_SRAO_MEM_BANK, MCi_MISC_SRAO_MEM_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -448,17 +489,18 @@ static int inject_mem_srao(xc_interface *xc_handle,
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_mce(xc_handle, cpu_nr);
-    if ( ret )
-        err(xc_handle, "Failed to inject MCE error\n");
+
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
 
     return 0;
 }
 
-static int inject_llc_ucna(xc_interface *xc_handle,
-                             uint32_t cpu_nr,
-                             uint32_t domain,
-                             uint64_t gaddr)
+static int intel_inject_llc_ucna(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
 {
     uint64_t gpfn, mfn, haddr;
     int ret = 0;
@@ -472,7 +514,7 @@ static int inject_llc_ucna(xc_interface *xc_handle,
     if ( ret )
         err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
 
-    ret = inject_mci_misc(xc_handle, cpu_nr,
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
                           CMCI_UCNA_LLC_BANK, MCi_MISC_UCNA_LLC_VAL);
     if ( ret )
         err(xc_handle, "Failed to inject MCi_MISC MSR\n");
@@ -489,13 +531,108 @@ static int inject_llc_ucna(xc_interface *xc_handle,
     ret = flush_msr_inj(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MSR\n");
-    ret = inject_cmci(xc_handle, cpu_nr);
+    ret = intel_inject_cmci(xc_handle);
     if ( ret )
         err(xc_handle, "Failed to inject MCE error\n");
 
     return 0;
 }
 
+static int amd_inject_mem(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
+{
+    uint64_t gpfn, mfn, haddr;
+    int ret = 0;
+
+    ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCG_STATUS MSR\n");
+
+    ret = inject_mci_status(xc_handle, cpu_nr,
+                            MCE_MEM_BANK, MCi_STATUS_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
+                          MCE_MEM_BANK, MCi_MISC_MEM_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_MISC MSR\n");
+
+    gpfn = gaddr >> PAGE_SHIFT;
+    mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn);
+    if (!mfn_valid(mfn))
+        err(xc_handle, "The MFN is not valid\n");
+    haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1));
+    ret = inject_mci_addr(xc_handle, cpu_nr, MCE_MEM_BANK, haddr);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_ADDR MSR\n");
+
+    ret = flush_msr_inj(xc_handle);
+    if ( ret )
+        err(xc_handle, "Failed to inject MSR\n");
+
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
+
+    return 0;
+}
+
+static int amd_inject_l3(xc_interface *xc_handle,
+    uint32_t cpu_nr, uint32_t domain, uint64_t gaddr)
+{
+    uint64_t gpfn, mfn, haddr;
+    int ret = 0;
+
+    ret = inject_mcg_status(xc_handle, cpu_nr, MCG_STATUS_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCG_STATUS MSR\n");
+
+    ret = inject_mci_status(xc_handle, cpu_nr,
+                            MCE_L3_BANK, MCi_STATUS_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_STATUS MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 0,
+                          MCE_L3_BANK, MC4_MISC0_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC0 MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 1,
+                          MCE_L3_BANK, MC4_MISC1_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC1 MSR\n");
+
+    ret = inject_mci_misc(xc_handle, cpu_nr, 2,
+                          MCE_L3_BANK, MC4_MISC2_L3_VAL);
+    if ( ret )
+        err(xc_handle, "Failed to inject MC4_MISC2 MSR\n");
+
+    gpfn = gaddr >> PAGE_SHIFT;
+    mfn = mca_gpfn_to_mfn(xc_handle, domain, gpfn);
+    if (!mfn_valid(mfn))
+        err(xc_handle, "The MFN is not valid\n");
+    haddr = (mfn << PAGE_SHIFT) | (gaddr & (PAGE_SIZE - 1));
+    ret = inject_mci_addr(xc_handle, cpu_nr, MCE_L3_BANK, haddr);
+    if ( ret )
+        err(xc_handle, "Failed to inject MCi_ADDR MSR\n");
+
+    ret = flush_msr_inj(xc_handle);
+    if ( ret )
+        err(xc_handle, "Failed to inject MSR\n");
+
+    if (opt_exception) {
+        ret = inject_mce(xc_handle, cpu_nr);
+        if ( ret )
+            err(xc_handle, "Failed to inject MCE error\n");
+    }
+
+    return 0;
+}
+
+
 static long xs_get_dom_mem(int domid)
 {
     char path[128];
@@ -508,7 +645,7 @@ static long xs_get_dom_mem(int domid)
     if (!xs)
         return -1;
 
-    sprintf(path, "/local/domain/%d/memory/target", domid);
+    snprintf(path, sizeof(path), "/local/domain/%d/memory/target", domid);
     memstr = xs_read(xs, XBT_NULL, path, &plen);
     xs_daemon_close(xs);
 
@@ -540,30 +677,101 @@ static void help(void)
            "  -D, --dump           dump addr info without error injection\n"
            "  -c, --cpu=CPU_ID     target CPU\n"
            "  -d, --domain=DomID   target domain, the default is Xen itself\n"
+           "  -e                   raise MCE exception\n"
            "  -h, --help           print this page\n"
            "  -p, --phyaddr        physical address\n"
-           "  -t, --type=error     error type\n"
-           "                        0 : MCE_SRAO_MEM\n"
-           "                        1 : MCE_SRAO_LLC\n"
-           "                        2 : CMCI_UCNA_LLC\n"
-           "\n"
            );
+
+    if (cpu_vendor == CPU_VENDOR_INTEL) {
+           printf(
+           "  -t, --type=error     error type\n"
+           "                        0x0 : SRAO MEM\n"
+           "                        0x1 : SRAO LLC\n"
+           "                        0x2 : CMCI UCNA LLC\n");
+     }
+     if (cpu_vendor == CPU_VENDOR_AMD) {
+           printf(
+           "  -t, --type=error     error type\n"
+           "                        0x0: DRAM error\n"
+           "                        0x1: L3 cache error\n");
+     }
+     printf("\n");
+}
+
+static void cpuid(const unsigned int *input, unsigned int regs[4])
+{
+    unsigned int count = (input[1] == XEN_CPUID_INPUT_UNUSED) ? 0 : input[1];
+#ifdef __i386__
+    /* Use the stack to avoid reg constraint failures with some gcc flags */
+    asm (
+        "push %%ebx; push %%edx\n\t"
+        "cpuid\n\t"
+        "mov %%ebx,4(%4)\n\t"
+        "mov %%edx,12(%4)\n\t"
+        "pop %%edx; pop %%ebx\n\t"
+        : "=a" (regs[0]), "=c" (regs[2])
+        : "0" (input[0]), "1" (count), "S" (regs)
+        : "memory" );
+#else
+    asm (
+        "cpuid"
+        : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+        : "0" (input[0]), "2" (count) );
+#endif
+}
+
+/* Get the manufacturer brand name of the host processor. */
+static void cpuid_brand_get(char *str, size_t len)
+{
+    unsigned int input[2] = { 0, 0 };
+    union {
+        unsigned int regs[4];
+        struct {
+            char eax[4];
+            char ebx[4];
+            char ecx[4];
+            char edx[4];
+        } str_regs;
+    } cpu_branding;
+
+    cpuid(input, cpu_branding.regs);
+
+    snprintf(str, len, "%c%c%c%c%c%c%c%c%c%c%c%c",
+        cpu_branding.str_regs.ebx[0], cpu_branding.str_regs.ebx[1],
+        cpu_branding.str_regs.ebx[2], cpu_branding.str_regs.ebx[3],
+        cpu_branding.str_regs.edx[0], cpu_branding.str_regs.edx[1],
+        cpu_branding.str_regs.edx[2], cpu_branding.str_regs.edx[3],
+        cpu_branding.str_regs.ecx[0], cpu_branding.str_regs.ecx[1],
+        cpu_branding.str_regs.ecx[2], cpu_branding.str_regs.ecx[3]);
 }
 
 int main(int argc, char *argv[])
 {
-    int type = MCE_SRAO_MEM;
+    int type;
     int c, opt_index;
     uint32_t domid;
     xc_interface *xc_handle;
-    int cpu_nr;
-    int64_t gaddr, gpfn, mfn, haddr, max_gpa;
+    unsigned int cpu_nr;
+    uint64_t gaddr, gpfn, mfn, haddr, max_gpa;
+    char cpu_brand[13];
 
     /* Default Value */
     domid = DOMID_XEN;
     gaddr = 0x180020;
     cpu_nr = 0;
 
+    cpu_vendor = CPU_VENDOR_UNKNOWN;
+    cpuid_brand_get(cpu_brand, sizeof(cpu_brand));
+    if (strstr(cpu_brand, "AMD"))
+        cpu_vendor = CPU_VENDOR_AMD;
+    if (strstr(cpu_brand, "Intel"))
+        cpu_vendor = CPU_VENDOR_INTEL;
+
+    if (cpu_vendor == CPU_VENDOR_AMD)
+        type = AMD_MCE_MEM;
+    if (cpu_vendor == CPU_VENDOR_INTEL)
+        type = INTEL_MCE_SRAO_MEM;
+
     init_msr_inj();
     xc_handle = xc_interface_open(0, 0, 0);
     if ( !xc_handle ) {
@@ -571,8 +779,8 @@ int main(int argc, char *argv[])
         exit(EXIT_FAILURE);
     }
 
-    while ( 1 ) {
-        c = getopt_long(argc, argv, "c:Dd:t:hp:r", opts, &opt_index);
+    for (;;) {
+        c = getopt_long(argc, argv, "c:Dd:t:hp:r:e", opts, &opt_index);
         if ( c == -1 )
             break;
         switch ( c ) {
@@ -580,23 +788,26 @@ int main(int argc, char *argv[])
             dump=1;
             break;
         case 'c':
-            cpu_nr = strtol(optarg, &optarg, 10);
+            cpu_nr = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input a digit parameter for CPU\n");
             break;
         case 'd':
-            domid = strtol(optarg, &optarg, 10);
+            domid = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input a digit parameter for domain\n");
             break;
         case 'p':
-            gaddr = strtol(optarg, &optarg, 0);
+            gaddr = strtoul(optarg, &optarg, 0);
             if ( strlen(optarg) != 0 )
                 err(xc_handle, "Please input correct page address\n");
             break;
         case 't':
             type = strtol(optarg, NULL, 0);
             break;
+        case 'e':
+            opt_exception = 1;
+            break;
         case 'h':
         default:
             help();
@@ -627,19 +838,36 @@ int main(int argc, char *argv[])
         goto out;
     }
 
-    switch ( type )
-    {
-    case MCE_SRAO_MEM:
-        inject_mem_srao(xc_handle, cpu_nr, domid, gaddr);
-        break;
-    case MCE_SRAO_LLC:
-        inject_llc_srao(xc_handle, cpu_nr, domid, gaddr);
-        break;
-    case CMCI_UCNA_LLC:
-        inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr);
+    switch ( cpu_vendor ) {
+    case CPU_VENDOR_INTEL:
+        switch ( type ) {
+        case INTEL_MCE_SRAO_MEM:
+            intel_inject_mem_srao(xc_handle, cpu_nr, domid, gaddr);
+            break;
+        case INTEL_MCE_SRAO_LLC:
+            intel_inject_llc_srao(xc_handle, cpu_nr, domid, gaddr);
+            break;
+        case INTEL_CMCI_UCNA_LLC:
+            intel_inject_llc_ucna(xc_handle, cpu_nr, domid, gaddr);
+            break;
+        default:
+            err(xc_handle, "Unsupported error type\n");
+            break;
+        }
         break;
-    default:
-        err(xc_handle, "Unsupported error type\n");
+
+    case CPU_VENDOR_AMD:
+        switch ( type ) {
+        case AMD_MCE_MEM:
+            amd_inject_mem(xc_handle, cpu_nr, domid, gaddr);
+            break;
+        case AMD_MCE_L3:
+            amd_inject_l3(xc_handle, cpu_nr, domid, gaddr);
+            break;
+        default:
+            err(xc_handle, "Unsupported error type\n");
+            break;
+        }
         break;
     }
 

[-- Attachment #3: Type: text/plain, Size: 126 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel

^ permalink raw reply related	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2013-03-27 13:41 [PATCH] tools/xen-mceinj: support AMD Egger Christoph
@ 2013-03-28 11:44 ` Jan Beulich
  2013-03-28 13:23   ` Christoph Egger
  0 siblings, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2013-03-28 11:44 UTC (permalink / raw)
  To: Egger Christoph; +Cc: xen-devel

>>> On 27.03.13 at 14:41, Egger Christoph <chegger@amazon.de> wrote:

Didn't you also require a hypervisor side change for

>+#define MC4_type_MISC1      0x4
>+#define MC4_type_MISC2      0x5
>+#define MC4_type_MISC3      0x6

which also gets me back to the previously asked question why
this is done only for bank 4.

>-    sprintf(path, "/local/domain/%d/memory/target", domid);
>+    snprintf(path, sizeof(path), "/local/domain/%d/memory/target", domid);

This continues to be valid, but unrelated.

>-    int type = MCE_SRAO_MEM;
>+    int type;
>...
>+    if (cpu_vendor == CPU_VENDOR_AMD)
>+        type = AMD_MCE_MEM;
>+    if (cpu_vendor == CPU_VENDOR_INTEL)
>+        type = INTEL_MCE_SRAO_MEM;

still leaves type uninitialized for the non-Intel, non-AMD case. And
some compilers aren't going to be able to figure out that the
variable only gets used for either of these two cases, and will raise
a warning.

Jan

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2013-03-28 11:44 ` Jan Beulich
@ 2013-03-28 13:23   ` Christoph Egger
  2013-03-28 13:58     ` Jan Beulich
  2013-05-30 14:29     ` Christoph Egger
  0 siblings, 2 replies; 16+ messages in thread
From: Christoph Egger @ 2013-03-28 13:23 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 28.03.13 12:44, Jan Beulich wrote:
>>>> On 27.03.13 at 14:41, Egger Christoph <chegger@amazon.de> wrote:
>
> Didn't you also require a hypervisor side change for
>
>> +#define MC4_type_MISC1      0x4
>> +#define MC4_type_MISC2      0x5
>> +#define MC4_type_MISC3      0x6
>
> which also gets me back to the previously asked question why
> this is done only for bank 4.

These MSRs only exist on bank 4.

>> -    sprintf(path, "/local/domain/%d/memory/target", domid);
>> +    snprintf(path, sizeof(path), "/local/domain/%d/memory/target", domid);
>
> This continues to be valid, but unrelated.
>
>> -    int type = MCE_SRAO_MEM;
>> +    int type;
>> ...
>> +    if (cpu_vendor == CPU_VENDOR_AMD)
>> +        type = AMD_MCE_MEM;
>> +    if (cpu_vendor == CPU_VENDOR_INTEL)
>> +        type = INTEL_MCE_SRAO_MEM;
>
> still leaves type uninitialized for the non-Intel, non-AMD case. And
> some compilers aren't going to be able to figure out that the
> variable only gets used for either of these two cases, and will raise
> a warning.

I haven't seen any warning but ok, will fix.

Christoph

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2013-03-28 13:23   ` Christoph Egger
@ 2013-03-28 13:58     ` Jan Beulich
  2013-03-28 14:11       ` Christoph Egger
  2013-05-30 14:29     ` Christoph Egger
  1 sibling, 1 reply; 16+ messages in thread
From: Jan Beulich @ 2013-03-28 13:58 UTC (permalink / raw)
  To: Christoph Egger; +Cc: xen-devel

>>> On 28.03.13 at 14:23, Christoph Egger <chegger@amazon.de> wrote:
> On 28.03.13 12:44, Jan Beulich wrote:
>>>>> On 27.03.13 at 14:41, Egger Christoph <chegger@amazon.de> wrote:
>>
>> Didn't you also require a hypervisor side change for

You didn't answer this one.

>>> +#define MC4_type_MISC1      0x4
>>> +#define MC4_type_MISC2      0x5
>>> +#define MC4_type_MISC3      0x6
>>
>> which also gets me back to the previously asked question why
>> this is done only for bank 4.
> 
> These MSRs only exist on bank 4.

Sure, but it still looks pretty arbitrary. Anyway, the comment
wasn't meant to be a NAK of any kind.

Jan

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2013-03-28 13:58     ` Jan Beulich
@ 2013-03-28 14:11       ` Christoph Egger
  2013-04-11  7:53         ` Christoph Egger
  0 siblings, 1 reply; 16+ messages in thread
From: Christoph Egger @ 2013-03-28 14:11 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 28.03.13 14:58, Jan Beulich wrote:
>>>> On 28.03.13 at 14:23, Christoph Egger <chegger@amazon.de> wrote:
>> On 28.03.13 12:44, Jan Beulich wrote:
>>>>>> On 27.03.13 at 14:41, Egger Christoph <chegger@amazon.de> wrote:
>>>
>>> Didn't you also require a hypervisor side change for
>
> You didn't answer this one.

Yes, this is right for AMD but not for Intel.
I will submit it once I got the permission.

Christoph

>>>> +#define MC4_type_MISC1      0x4
>>>> +#define MC4_type_MISC2      0x5
>>>> +#define MC4_type_MISC3      0x6
>>>
>>> which also gets me back to the previously asked question why
>>> this is done only for bank 4.
>>
>> These MSRs only exist on bank 4.
>
> Sure, but it still looks pretty arbitrary. Anyway, the comment
> wasn't meant to be a NAK of any kind.
>
> Jan
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2013-03-28 14:11       ` Christoph Egger
@ 2013-04-11  7:53         ` Christoph Egger
  0 siblings, 0 replies; 16+ messages in thread
From: Christoph Egger @ 2013-04-11  7:53 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 28.03.13 15:11, Christoph Egger wrote:
> On 28.03.13 14:58, Jan Beulich wrote:
>>>>> On 28.03.13 at 14:23, Christoph Egger <chegger@amazon.de> wrote:
>>> On 28.03.13 12:44, Jan Beulich wrote:
>>>>>>> On 27.03.13 at 14:41, Egger Christoph <chegger@amazon.de> wrote:
>>>>
>>>> Didn't you also require a hypervisor side change for
>>
>> You didn't answer this one.
> 
> Yes, this is right for AMD but not for Intel.

Jan, the patch is here:
http://lists.xen.org/archives/html/xen-devel/2012-10/msg02010.html
Please go ahead.

Christoph

> 
> Christoph
> 
>>>>> +#define MC4_type_MISC1      0x4
>>>>> +#define MC4_type_MISC2      0x5
>>>>> +#define MC4_type_MISC3      0x6
>>>>
>>>> which also gets me back to the previously asked question why
>>>> this is done only for bank 4.
>>>
>>> These MSRs only exist on bank 4.
>>
>> Sure, but it still looks pretty arbitrary. Anyway, the comment
>> wasn't meant to be a NAK of any kind.
>>
>> Jan
>>
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH] tools/xen-mceinj: support AMD
  2013-03-28 13:23   ` Christoph Egger
  2013-03-28 13:58     ` Jan Beulich
@ 2013-05-30 14:29     ` Christoph Egger
  1 sibling, 0 replies; 16+ messages in thread
From: Christoph Egger @ 2013-05-30 14:29 UTC (permalink / raw)
  To: Jan Beulich; +Cc: xen-devel

On 28.03.13 14:23, Christoph Egger wrote:
> On 28.03.13 12:44, Jan Beulich wrote:
>>>>> On 27.03.13 at 14:41, Egger Christoph <chegger@amazon.de> wrote:
>>
>> Didn't you also require a hypervisor side change for
>>
>>> +#define MC4_type_MISC1      0x4
>>> +#define MC4_type_MISC2      0x5
>>> +#define MC4_type_MISC3      0x6
>>
>> which also gets me back to the previously asked question why
>> this is done only for bank 4.
> 
> These MSRs only exist on bank 4.
> 
>>> -    sprintf(path, "/local/domain/%d/memory/target", domid);
>>> +    snprintf(path, sizeof(path), "/local/domain/%d/memory/target",
>>> domid);
>>
>> This continues to be valid, but unrelated.
>>
>>> -    int type = MCE_SRAO_MEM;
>>> +    int type;
>>> ...
>>> +    if (cpu_vendor == CPU_VENDOR_AMD)
>>> +        type = AMD_MCE_MEM;
>>> +    if (cpu_vendor == CPU_VENDOR_INTEL)
>>> +        type = INTEL_MCE_SRAO_MEM;
>>
>> still leaves type uninitialized for the non-Intel, non-AMD case. And
>> some compilers aren't going to be able to figure out that the
>> variable only gets used for either of these two cases, and will raise
>> a warning.
> 
> I haven't seen any warning but ok, will fix.

I am sending a new with this fixed.

Christoph

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2013-05-30 14:29 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-03-27 13:41 [PATCH] tools/xen-mceinj: support AMD Egger Christoph
2013-03-28 11:44 ` Jan Beulich
2013-03-28 13:23   ` Christoph Egger
2013-03-28 13:58     ` Jan Beulich
2013-03-28 14:11       ` Christoph Egger
2013-04-11  7:53         ` Christoph Egger
2013-05-30 14:29     ` Christoph Egger
  -- strict thread matches above, loose matches on Subject: below --
2012-10-05 14:07 Christoph Egger
2012-10-19 13:10 ` Christoph Egger
2012-10-19 14:58   ` Jan Beulich
2012-10-19 15:01     ` Ian Jackson
2012-10-19 15:05       ` Jan Beulich
2012-11-12 16:25         ` Ian Campbell
2012-11-13  1:40           ` Hao, Xudong
2012-11-13  8:40             ` Jan Beulich
2012-10-29 10:20 ` Jan Beulich

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).