qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [Patch V1 1/2] x86, mce: Basic support to add LMCE support to QEMU
@ 2015-12-10 16:38 Ashok Raj
  2015-12-10 16:38 ` [Qemu-devel] [Patch V0 2/2] x86, mce: Need to translate GPA to HPA to inject error in guest Ashok Raj
  0 siblings, 1 reply; 2+ messages in thread
From: Ashok Raj @ 2015-12-10 16:38 UTC (permalink / raw)
  To: qemu-devel
  Cc: Tony Luck, Ashok Raj, Gleb Natapov, Gong Chen, Andi Kleen,
	Paolo Bonzini, Boris Petkov

This patch adds basic enumeration, control msr's required to support
Local Machine Check Exception Support (LMCE).

- Added Local Machine Check definitions, changed MCG_CAP
- Added support for IA32_FEATURE_CONTROL.
- When delivering MCE to guest, we deliver to just a single CPU
  when guest OS has opted in to Local delivery.

Also tested: 
- Legacy guest that doesn't support recovery would panic.
- Platform that supports recovery but no LMCE. KVM can emulate LMCE for guest.
  Platform MCE is broadcast, but we inject MCE just for one thread that 
  encountered the fault.

Reviewed-by: Andi Kleen <andi.kleen@intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Tested-by: Gong Chen <gong.chen@intel.com>
---
V2 patchset: 
Removed a line based on Paolo's suggestion. Gong verified the new
patch works. 

 
 target-i386/cpu.c |  8 ++++++++
 target-i386/cpu.h |  8 ++++++--
 target-i386/kvm.c | 38 +++++++++++++++++++++++++++++++-------
 3 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 11e5e39..167669a 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2737,6 +2737,13 @@ static void mce_init(X86CPU *cpu)
     }
 }
 
+static void feature_control_init(X86CPU *cpu)
+{
+	CPUX86State *cenv = &cpu->env;
+
+	cenv->msr_ia32_feature_control = ((1<<20) | (1<<0));
+}
+
 #ifndef CONFIG_USER_ONLY
 static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
 {
@@ -2858,6 +2865,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
 #endif
 
     mce_init(cpu);
+    feature_control_init(cpu);
 
 #ifndef CONFIG_USER_ONLY
     if (tcg_enabled()) {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 84edfd0..a567d7a 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -282,8 +282,9 @@
 
 #define MCG_CTL_P       (1ULL<<8)   /* MCG_CAP register available */
 #define MCG_SER_P       (1ULL<<24) /* MCA recovery/new status bits */
+#define MCG_LMCE_P	(1ULL<<27) /* Local Machine Check Supported */
 
-#define MCE_CAP_DEF     (MCG_CTL_P|MCG_SER_P)
+#define MCE_CAP_DEF     (MCG_CTL_P|MCG_SER_P|MCG_LMCE_P)
 #define MCE_BANKS_DEF   10
 
 #define MCG_CAP_BANKS_MASK 0xff
@@ -291,6 +292,7 @@
 #define MCG_STATUS_RIPV (1ULL<<0)   /* restart ip valid */
 #define MCG_STATUS_EIPV (1ULL<<1)   /* ip points to correct instruction */
 #define MCG_STATUS_MCIP (1ULL<<2)   /* machine check in progress */
+#define MCG_STATUS_LMCE (1ULL<<3)   /* Local MCE signaled */
 
 #define MCI_STATUS_VAL   (1ULL<<63)  /* valid error */
 #define MCI_STATUS_OVER  (1ULL<<62)  /* previous errors lost */
@@ -333,6 +335,7 @@
 #define MSR_MCG_CAP                     0x179
 #define MSR_MCG_STATUS                  0x17a
 #define MSR_MCG_CTL                     0x17b
+#define MSR_MCG_EXT_CTL			0x4d0
 
 #define MSR_P6_EVNTSEL0                 0x186
 
@@ -892,7 +895,6 @@ typedef struct CPUX86State {
 
     uint64_t mcg_status;
     uint64_t msr_ia32_misc_enable;
-    uint64_t msr_ia32_feature_control;
 
     uint64_t msr_fixed_ctr_ctrl;
     uint64_t msr_global_ctrl;
@@ -977,8 +979,10 @@ typedef struct CPUX86State {
     int64_t tsc_khz;
     void *kvm_xsave_buf;
 
+    uint64_t msr_ia32_feature_control;
     uint64_t mcg_cap;
     uint64_t mcg_ctl;
+    uint64_t mcg_ext_ctl;
     uint64_t mce_banks[MCE_BANKS_DEF*4];
 
     uint64_t tsc_aux;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 6dc9846..c61fe1f 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -72,6 +72,7 @@ static bool has_msr_tsc_aux;
 static bool has_msr_tsc_adjust;
 static bool has_msr_tsc_deadline;
 static bool has_msr_feature_control;
+static bool has_msr_ext_mcg_ctl;
 static bool has_msr_async_pf_en;
 static bool has_msr_pv_eoi_en;
 static bool has_msr_misc_enable;
@@ -370,18 +371,30 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
     uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
                       MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S;
     uint64_t mcg_status = MCG_STATUS_MCIP;
+    int flags = 0;
+    CPUState *cs = CPU(cpu);
+
+    /*
+     * We need to read back the value of MSR_EXT_MCG_CTL that was set by the
+     * guest kernel back into Qemu
+     */
+    cpu_synchronize_state(cs);
+
+    flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0;
 
     if (code == BUS_MCEERR_AR) {
-        status |= MCI_STATUS_AR | 0x134;
-        mcg_status |= MCG_STATUS_EIPV;
+	status |= MCI_STATUS_AR | 0x134;
+	mcg_status |= MCG_STATUS_EIPV;
+	if (env->mcg_ext_ctl & 0x1) {
+		mcg_status |= MCG_STATUS_LMCE;
+		flags = 0; /* No Broadcast when LMCE is opted by guest */
+	}
     } else {
         status |= 0xc0;
         mcg_status |= MCG_STATUS_RIPV;
     }
     cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr,
-                       (MCM_ADDR_PHYS << 6) | 0xc,
-                       cpu_x86_support_mca_broadcast(env) ?
-                       MCE_INJECT_BROADCAST : 0);
+		       (MCM_ADDR_PHYS << 6) | 0xc, flags);
 }
 
 static void hardware_memory_error(void)
@@ -808,10 +821,14 @@ int kvm_arch_init_vcpu(CPUState *cs)
 
     c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0);
     if (c) {
-        has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) ||
-                                  !!(c->ecx & CPUID_EXT_SMX);
+        has_msr_feature_control = !!((c->ecx & CPUID_EXT_VMX) ||
+                                  !!(c->ecx & CPUID_EXT_SMX) || 
+				  !!(env->mcg_cap & MCG_LMCE_P));
     }
 
+    if (has_msr_feature_control && (env->mcg_cap & MCG_LMCE_P))
+        has_msr_ext_mcg_ctl = true;
+
     c = cpuid_find_entry(&cpuid_data.cpuid, 0x80000007, 0);
     if (c && (c->edx & 1<<8) && invtsc_mig_blocker == NULL) {
         /* for migration */
@@ -1557,6 +1574,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
 
         kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
         kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
+	kvm_msr_entry_set(&msrs[n++], MSR_MCG_EXT_CTL, env->mcg_ext_ctl);
         for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
             kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
         }
@@ -1811,6 +1829,9 @@ static int kvm_get_msrs(X86CPU *cpu)
     if (has_msr_feature_control) {
         msrs[n++].index = MSR_IA32_FEATURE_CONTROL;
     }
+    if (has_msr_ext_mcg_ctl) {
+    	msrs[n++].index = MSR_MCG_EXT_CTL;
+    }
     if (has_msr_bndcfgs) {
         msrs[n++].index = MSR_IA32_BNDCFGS;
     }
@@ -1981,6 +2002,9 @@ static int kvm_get_msrs(X86CPU *cpu)
         case MSR_IA32_FEATURE_CONTROL:
             env->msr_ia32_feature_control = msrs[i].data;
             break;
+	case MSR_MCG_EXT_CTL:
+	    env->mcg_ext_ctl = msrs[i].data;
+	    break;
         case MSR_IA32_BNDCFGS:
             env->msr_bndcfgs = msrs[i].data;
             break;
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [Qemu-devel] [Patch V0 2/2] x86, mce: Need to translate GPA to HPA to inject error in guest.
  2015-12-10 16:38 [Qemu-devel] [Patch V1 1/2] x86, mce: Basic support to add LMCE support to QEMU Ashok Raj
@ 2015-12-10 16:38 ` Ashok Raj
  0 siblings, 0 replies; 2+ messages in thread
From: Ashok Raj @ 2015-12-10 16:38 UTC (permalink / raw)
  To: qemu-devel
  Cc: Tony Luck, root, Ashok Raj, Gleb Natapov, Gong Chen, Andi Kleen,
	Paolo Bonzini, Boris Petkov

From: root <root@lmcesrv.bj.intel.com>

When we need to test error injection to a specific address using EINJ,
there needs to be a way to translate GPA to HPA. This will allow host EINJ
to inject error to test how guest behavior is when a bad address is consumed.
This permits guest OS to perform its own recovery.

Signed-off-by: Gong Chen <gong.chen@intel.com>
---
 hmp-commands.hx       | 14 ++++++++++++++
 include/exec/memory.h |  2 ++
 kvm-all.c             | 24 ++++++++++++++++++++++++
 memory.c              | 13 +++++++++++++
 monitor.c             | 16 ++++++++++++++++
 5 files changed, 69 insertions(+)
 mode change 100644 => 100755 include/exec/memory.h
 mode change 100644 => 100755 kvm-all.c
 mode change 100644 => 100755 memory.c
 mode change 100644 => 100755 monitor.c

diff --git a/hmp-commands.hx b/hmp-commands.hx
index bb52e4d..673c00e 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -444,6 +444,20 @@ Start gdbserver session (default @var{port}=1234)
 ETEXI
 
     {
+	 .name         = "x-gpa2hva",
+	 .args_type    = "fmt:/,addr:l",
+	 .params       = "/fmt addr",
+	 .help	       = "translate guest physical 'addr' to host virtual address, only for debugging",
+	 .mhandler.cmd = do_gpa2hva,
+    },
+
+STEXI
+@item x-gpa2hva @var{addr}
+@findex x-gpa2hva
+Translate guest physical @var{addr} to host virtual address, only for debugging.
+ETEXI
+
+    {
         .name       = "x",
         .args_type  = "fmt:/,addr:l",
         .params     = "/fmt addr",
diff --git a/include/exec/memory.h b/include/exec/memory.h
old mode 100644
new mode 100755
index 0f07159..57d7bf8
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -222,6 +222,7 @@ struct MemoryListener {
                                hwaddr addr, hwaddr len);
     void (*coalesced_mmio_del)(MemoryListener *listener, MemoryRegionSection *section,
                                hwaddr addr, hwaddr len);
+    int  (*translate_gpa2hva)(MemoryListener *listener, uint64_t paddr, uint64_t *vaddr);
     /* Lower = earlier (during add), later (during del) */
     unsigned priority;
     AddressSpace *address_space_filter;
@@ -1123,6 +1124,7 @@ void memory_global_dirty_log_start(void);
 void memory_global_dirty_log_stop(void);
 
 void mtree_info(fprintf_function mon_printf, void *f);
+int  memory_translate_gpa2hva(hwaddr paddr, uint64_t *vaddr);
 
 /**
  * memory_region_dispatch_read: perform a read directly to the specified
diff --git a/kvm-all.c b/kvm-all.c
old mode 100644
new mode 100755
index c648b81..cb029be
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -197,6 +197,29 @@ static KVMSlot *kvm_lookup_overlapping_slot(KVMMemoryListener *kml,
     return found;
 }
 
+
+static int kvm_translate_gpa2hva(MemoryListener *listener, uint64_t paddr, uint64_t *vaddr)
+{
+    KVMState *s = kvm_state;
+    KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+    KVMSlot *mem = NULL;
+    int i;
+
+    for (i = 0; i < s->nr_slots; i++) {
+        mem = &kml->slots[i];
+        if (paddr >= mem->start_addr && paddr < mem->start_addr + mem->memory_size) {
+            *vaddr = (uint64_t)mem->ram + paddr - mem->start_addr;
+            break;
+	}
+    }
+
+    if (i == s->nr_slots) {
+        fprintf(stderr, "fail to find target physical addr(%ld) in KVM memory range\n", paddr);
+	return 1;
+    }
+    return 0;
+}
+
 int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
                                        hwaddr *phys_addr)
 {
@@ -902,6 +925,7 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
     kml->listener.log_start = kvm_log_start;
     kml->listener.log_stop = kvm_log_stop;
     kml->listener.log_sync = kvm_log_sync;
+    kml->listener.translate_gpa2hva = kvm_translate_gpa2hva;
     kml->listener.priority = 10;
 
     memory_listener_register(&kml->listener, as);
diff --git a/memory.c b/memory.c
old mode 100644
new mode 100755
index e193658..979dcf8
--- a/memory.c
+++ b/memory.c
@@ -2294,6 +2294,19 @@ static const TypeInfo memory_region_info = {
     .instance_finalize  = memory_region_finalize,
 };
 
+int memory_translate_gpa2hva(hwaddr paddr, uint64_t *vaddr){
+    MemoryListener *ml = NULL;
+    int ret = 1;
+
+    QTAILQ_FOREACH(ml, &memory_listeners, link) {
+        if(ml->translate_gpa2hva)
+            ret = ml->translate_gpa2hva(ml, paddr, vaddr);
+	if(0 == ret)
+	    break;
+    }
+    return ret;
+}
+
 static void memory_register_types(void)
 {
     type_register_static(&memory_region_info);
diff --git a/monitor.c b/monitor.c
old mode 100644
new mode 100755
index 9a35d72..408e1fa
--- a/monitor.c
+++ b/monitor.c
@@ -76,6 +76,7 @@
 #include "qapi-event.h"
 #include "qmp-introspect.h"
 #include "sysemu/block-backend.h"
+#include "exec/memory.h"
 
 /* for hmp_info_irq/pic */
 #if defined(TARGET_SPARC)
@@ -1681,6 +1682,21 @@ static void hmp_acl_remove(Monitor *mon, const QDict *qdict)
     }
 }
 
+static void do_gpa2hva(Monitor *mon, const QDict *qdict)
+{
+    uint64_t paddr;
+    uint64_t vaddr;
+
+    paddr = qdict_get_int(qdict, "addr");
+    if (memory_translate_gpa2hva(paddr, &vaddr)){
+	monitor_printf(mon, "fail to translate gpa(0x%lx) to hva\n", paddr);
+	return;
+    }
+
+    monitor_printf(mon, "0x%lx\n", (unsigned long)vaddr);
+    return;
+}
+
 void qmp_getfd(const char *fdname, Error **errp)
 {
     mon_fd_t *monfd;
-- 
2.4.3

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-12-10 15:38 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-12-10 16:38 [Qemu-devel] [Patch V1 1/2] x86, mce: Basic support to add LMCE support to QEMU Ashok Raj
2015-12-10 16:38 ` [Qemu-devel] [Patch V0 2/2] x86, mce: Need to translate GPA to HPA to inject error in guest Ashok Raj

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).