* [Qemu-devel] [Patch V1 1/2] x86, mce: Basic support to add LMCE support to QEMU
@ 2015-12-10 16:38 Ashok Raj
2015-12-10 16:38 ` [Qemu-devel] [Patch V0 2/2] x86, mce: Need to translate GPA to HPA to inject error in guest Ashok Raj
0 siblings, 1 reply; 2+ messages in thread
From: Ashok Raj @ 2015-12-10 16:38 UTC (permalink / raw)
To: qemu-devel
Cc: Tony Luck, Ashok Raj, Gleb Natapov, Gong Chen, Andi Kleen,
Paolo Bonzini, Boris Petkov
This patch adds basic enumeration, control msr's required to support
Local Machine Check Exception Support (LMCE).
- Added Local Machine Check definitions, changed MCG_CAP
- Added support for IA32_FEATURE_CONTROL.
- When delivering MCE to guest, we deliver to just a single CPU
when guest OS has opted in to Local delivery.
Also tested:
- Legacy guest that doesn't support recovery would panic.
- Platform that supports recovery but no LMCE. KVM can emulate LMCE for guest.
Platform MCE is broadcast, but we inject MCE just for one thread that
encountered the fault.
Reviewed-by: Andi Kleen <andi.kleen@intel.com>
Signed-off-by: Ashok Raj <ashok.raj@intel.com>
Tested-by: Gong Chen <gong.chen@intel.com>
---
V2 patchset:
Removed a line based on Paolo's suggestion. Gong verified the new
patch works.
target-i386/cpu.c | 8 ++++++++
target-i386/cpu.h | 8 ++++++--
target-i386/kvm.c | 38 +++++++++++++++++++++++++++++++-------
3 files changed, 45 insertions(+), 9 deletions(-)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 11e5e39..167669a 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2737,6 +2737,13 @@ static void mce_init(X86CPU *cpu)
}
}
+static void feature_control_init(X86CPU *cpu)
+{
+ CPUX86State *cenv = &cpu->env;
+
+ cenv->msr_ia32_feature_control = ((1<<20) | (1<<0));
+}
+
#ifndef CONFIG_USER_ONLY
static void x86_cpu_apic_create(X86CPU *cpu, Error **errp)
{
@@ -2858,6 +2865,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp)
#endif
mce_init(cpu);
+ feature_control_init(cpu);
#ifndef CONFIG_USER_ONLY
if (tcg_enabled()) {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 84edfd0..a567d7a 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -282,8 +282,9 @@
#define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */
#define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */
+#define MCG_LMCE_P (1ULL<<27) /* Local Machine Check Supported */
-#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P)
+#define MCE_CAP_DEF (MCG_CTL_P|MCG_SER_P|MCG_LMCE_P)
#define MCE_BANKS_DEF 10
#define MCG_CAP_BANKS_MASK 0xff
@@ -291,6 +292,7 @@
#define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */
#define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */
#define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */
+#define MCG_STATUS_LMCE (1ULL<<3) /* Local MCE signaled */
#define MCI_STATUS_VAL (1ULL<<63) /* valid error */
#define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */
@@ -333,6 +335,7 @@
#define MSR_MCG_CAP 0x179
#define MSR_MCG_STATUS 0x17a
#define MSR_MCG_CTL 0x17b
+#define MSR_MCG_EXT_CTL 0x4d0
#define MSR_P6_EVNTSEL0 0x186
@@ -892,7 +895,6 @@ typedef struct CPUX86State {
uint64_t mcg_status;
uint64_t msr_ia32_misc_enable;
- uint64_t msr_ia32_feature_control;
uint64_t msr_fixed_ctr_ctrl;
uint64_t msr_global_ctrl;
@@ -977,8 +979,10 @@ typedef struct CPUX86State {
int64_t tsc_khz;
void *kvm_xsave_buf;
+ uint64_t msr_ia32_feature_control;
uint64_t mcg_cap;
uint64_t mcg_ctl;
+ uint64_t mcg_ext_ctl;
uint64_t mce_banks[MCE_BANKS_DEF*4];
uint64_t tsc_aux;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 6dc9846..c61fe1f 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -72,6 +72,7 @@ static bool has_msr_tsc_aux;
static bool has_msr_tsc_adjust;
static bool has_msr_tsc_deadline;
static bool has_msr_feature_control;
+static bool has_msr_ext_mcg_ctl;
static bool has_msr_async_pf_en;
static bool has_msr_pv_eoi_en;
static bool has_msr_misc_enable;
@@ -370,18 +371,30 @@ static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code)
uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S;
uint64_t mcg_status = MCG_STATUS_MCIP;
+ int flags = 0;
+ CPUState *cs = CPU(cpu);
+
+ /*
+ * We need to read back the value of MSR_EXT_MCG_CTL that was set by the
+ * guest kernel back into Qemu
+ */
+ cpu_synchronize_state(cs);
+
+ flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0;
if (code == BUS_MCEERR_AR) {
- status |= MCI_STATUS_AR | 0x134;
- mcg_status |= MCG_STATUS_EIPV;
+ status |= MCI_STATUS_AR | 0x134;
+ mcg_status |= MCG_STATUS_EIPV;
+ if (env->mcg_ext_ctl & 0x1) {
+ mcg_status |= MCG_STATUS_LMCE;
+ flags = 0; /* No Broadcast when LMCE is opted by guest */
+ }
} else {
status |= 0xc0;
mcg_status |= MCG_STATUS_RIPV;
}
cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr,
- (MCM_ADDR_PHYS << 6) | 0xc,
- cpu_x86_support_mca_broadcast(env) ?
- MCE_INJECT_BROADCAST : 0);
+ (MCM_ADDR_PHYS << 6) | 0xc, flags);
}
static void hardware_memory_error(void)
@@ -808,10 +821,14 @@ int kvm_arch_init_vcpu(CPUState *cs)
c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0);
if (c) {
- has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) ||
- !!(c->ecx & CPUID_EXT_SMX);
+ has_msr_feature_control = !!((c->ecx & CPUID_EXT_VMX) ||
+ !!(c->ecx & CPUID_EXT_SMX) ||
+ !!(env->mcg_cap & MCG_LMCE_P));
}
+ if (has_msr_feature_control && (env->mcg_cap & MCG_LMCE_P))
+ has_msr_ext_mcg_ctl = true;
+
c = cpuid_find_entry(&cpuid_data.cpuid, 0x80000007, 0);
if (c && (c->edx & 1<<8) && invtsc_mig_blocker == NULL) {
/* for migration */
@@ -1557,6 +1574,7 @@ static int kvm_put_msrs(X86CPU *cpu, int level)
kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
kvm_msr_entry_set(&msrs[n++], MSR_MCG_CTL, env->mcg_ctl);
+ kvm_msr_entry_set(&msrs[n++], MSR_MCG_EXT_CTL, env->mcg_ext_ctl);
for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) {
kvm_msr_entry_set(&msrs[n++], MSR_MC0_CTL + i, env->mce_banks[i]);
}
@@ -1811,6 +1829,9 @@ static int kvm_get_msrs(X86CPU *cpu)
if (has_msr_feature_control) {
msrs[n++].index = MSR_IA32_FEATURE_CONTROL;
}
+ if (has_msr_ext_mcg_ctl) {
+ msrs[n++].index = MSR_MCG_EXT_CTL;
+ }
if (has_msr_bndcfgs) {
msrs[n++].index = MSR_IA32_BNDCFGS;
}
@@ -1981,6 +2002,9 @@ static int kvm_get_msrs(X86CPU *cpu)
case MSR_IA32_FEATURE_CONTROL:
env->msr_ia32_feature_control = msrs[i].data;
break;
+ case MSR_MCG_EXT_CTL:
+ env->mcg_ext_ctl = msrs[i].data;
+ break;
case MSR_IA32_BNDCFGS:
env->msr_bndcfgs = msrs[i].data;
break;
--
2.4.3
^ permalink raw reply related [flat|nested] 2+ messages in thread
* [Qemu-devel] [Patch V0 2/2] x86, mce: Need to translate GPA to HPA to inject error in guest.
2015-12-10 16:38 [Qemu-devel] [Patch V1 1/2] x86, mce: Basic support to add LMCE support to QEMU Ashok Raj
@ 2015-12-10 16:38 ` Ashok Raj
0 siblings, 0 replies; 2+ messages in thread
From: Ashok Raj @ 2015-12-10 16:38 UTC (permalink / raw)
To: qemu-devel
Cc: Tony Luck, root, Ashok Raj, Gleb Natapov, Gong Chen, Andi Kleen,
Paolo Bonzini, Boris Petkov
From: root <root@lmcesrv.bj.intel.com>
When we need to test error injection to a specific address using EINJ,
there needs to be a way to translate GPA to HPA. This will allow host EINJ
to inject error to test how guest behavior is when a bad address is consumed.
This permits guest OS to perform its own recovery.
Signed-off-by: Gong Chen <gong.chen@intel.com>
---
hmp-commands.hx | 14 ++++++++++++++
include/exec/memory.h | 2 ++
kvm-all.c | 24 ++++++++++++++++++++++++
memory.c | 13 +++++++++++++
monitor.c | 16 ++++++++++++++++
5 files changed, 69 insertions(+)
mode change 100644 => 100755 include/exec/memory.h
mode change 100644 => 100755 kvm-all.c
mode change 100644 => 100755 memory.c
mode change 100644 => 100755 monitor.c
diff --git a/hmp-commands.hx b/hmp-commands.hx
index bb52e4d..673c00e 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -444,6 +444,20 @@ Start gdbserver session (default @var{port}=1234)
ETEXI
{
+ .name = "x-gpa2hva",
+ .args_type = "fmt:/,addr:l",
+ .params = "/fmt addr",
+ .help = "translate guest physical 'addr' to host virtual address, only for debugging",
+ .mhandler.cmd = do_gpa2hva,
+ },
+
+STEXI
+@item x-gpa2hva @var{addr}
+@findex x-gpa2hva
+Translate guest physical @var{addr} to host virtual address, only for debugging.
+ETEXI
+
+ {
.name = "x",
.args_type = "fmt:/,addr:l",
.params = "/fmt addr",
diff --git a/include/exec/memory.h b/include/exec/memory.h
old mode 100644
new mode 100755
index 0f07159..57d7bf8
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -222,6 +222,7 @@ struct MemoryListener {
hwaddr addr, hwaddr len);
void (*coalesced_mmio_del)(MemoryListener *listener, MemoryRegionSection *section,
hwaddr addr, hwaddr len);
+ int (*translate_gpa2hva)(MemoryListener *listener, uint64_t paddr, uint64_t *vaddr);
/* Lower = earlier (during add), later (during del) */
unsigned priority;
AddressSpace *address_space_filter;
@@ -1123,6 +1124,7 @@ void memory_global_dirty_log_start(void);
void memory_global_dirty_log_stop(void);
void mtree_info(fprintf_function mon_printf, void *f);
+int memory_translate_gpa2hva(hwaddr paddr, uint64_t *vaddr);
/**
* memory_region_dispatch_read: perform a read directly to the specified
diff --git a/kvm-all.c b/kvm-all.c
old mode 100644
new mode 100755
index c648b81..cb029be
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -197,6 +197,29 @@ static KVMSlot *kvm_lookup_overlapping_slot(KVMMemoryListener *kml,
return found;
}
+
+static int kvm_translate_gpa2hva(MemoryListener *listener, uint64_t paddr, uint64_t *vaddr)
+{
+ KVMState *s = kvm_state;
+ KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+ KVMSlot *mem = NULL;
+ int i;
+
+ for (i = 0; i < s->nr_slots; i++) {
+ mem = &kml->slots[i];
+ if (paddr >= mem->start_addr && paddr < mem->start_addr + mem->memory_size) {
+ *vaddr = (uint64_t)mem->ram + paddr - mem->start_addr;
+ break;
+ }
+ }
+
+ if (i == s->nr_slots) {
+ fprintf(stderr, "fail to find target physical addr(%ld) in KVM memory range\n", paddr);
+ return 1;
+ }
+ return 0;
+}
+
int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
hwaddr *phys_addr)
{
@@ -902,6 +925,7 @@ void kvm_memory_listener_register(KVMState *s, KVMMemoryListener *kml,
kml->listener.log_start = kvm_log_start;
kml->listener.log_stop = kvm_log_stop;
kml->listener.log_sync = kvm_log_sync;
+ kml->listener.translate_gpa2hva = kvm_translate_gpa2hva;
kml->listener.priority = 10;
memory_listener_register(&kml->listener, as);
diff --git a/memory.c b/memory.c
old mode 100644
new mode 100755
index e193658..979dcf8
--- a/memory.c
+++ b/memory.c
@@ -2294,6 +2294,19 @@ static const TypeInfo memory_region_info = {
.instance_finalize = memory_region_finalize,
};
+int memory_translate_gpa2hva(hwaddr paddr, uint64_t *vaddr){
+ MemoryListener *ml = NULL;
+ int ret = 1;
+
+ QTAILQ_FOREACH(ml, &memory_listeners, link) {
+ if(ml->translate_gpa2hva)
+ ret = ml->translate_gpa2hva(ml, paddr, vaddr);
+ if(0 == ret)
+ break;
+ }
+ return ret;
+}
+
static void memory_register_types(void)
{
type_register_static(&memory_region_info);
diff --git a/monitor.c b/monitor.c
old mode 100644
new mode 100755
index 9a35d72..408e1fa
--- a/monitor.c
+++ b/monitor.c
@@ -76,6 +76,7 @@
#include "qapi-event.h"
#include "qmp-introspect.h"
#include "sysemu/block-backend.h"
+#include "exec/memory.h"
/* for hmp_info_irq/pic */
#if defined(TARGET_SPARC)
@@ -1681,6 +1682,21 @@ static void hmp_acl_remove(Monitor *mon, const QDict *qdict)
}
}
+static void do_gpa2hva(Monitor *mon, const QDict *qdict)
+{
+ uint64_t paddr;
+ uint64_t vaddr;
+
+ paddr = qdict_get_int(qdict, "addr");
+ if (memory_translate_gpa2hva(paddr, &vaddr)){
+ monitor_printf(mon, "fail to translate gpa(0x%lx) to hva\n", paddr);
+ return;
+ }
+
+ monitor_printf(mon, "0x%lx\n", (unsigned long)vaddr);
+ return;
+}
+
void qmp_getfd(const char *fdname, Error **errp)
{
mon_fd_t *monfd;
--
2.4.3
^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2015-12-10 15:38 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-12-10 16:38 [Qemu-devel] [Patch V1 1/2] x86, mce: Basic support to add LMCE support to QEMU Ashok Raj
2015-12-10 16:38 ` [Qemu-devel] [Patch V0 2/2] x86, mce: Need to translate GPA to HPA to inject error in guest Ashok Raj
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).