public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC] nmi watchdog in kvm
@ 2008-01-28  8:18 Balaji Rao
       [not found] ` <200801281348.41010.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Balaji Rao @ 2008-01-28  8:18 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Hello,

I was trying to enable the use of nmi watchdog within a linux guest running in kvm. I have done it 
by allowing direct access to perfmon msrs using the MSR_BITMAP field in vmcs region.

Most of the times the NMI Watchdog Test in the guest fails, but with a finite number of NMI's 
received  by the guest. But randomly it does work! Whenever it fails, i get this vmwrite error :

vmwrite error: reg 4016 value 80000202 (err 164061)

I have a few questions.

1. How are NMI's supposed to be delivered to the guest ? I did this by adding a new op to
kvm_x86_ops.
2. How am I supposed to handle perfmon MSRs ? Direct access may pose problems during migration. But 
am not sure how costly emulation by abstraction would be..
I have not yet considered saving the MSRS upon vmexits to allow multiple VMs use the MSRs. I think i 
can do them easily when i get this working.

Here's the code. Please tell me what dumb mistake I am doing.


diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index c02541e..276048a 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -342,7 +342,7 @@ static const struct wd_ops k7_wd_ops = {
 #define P6_EVNTSEL_INT		(1 << 20)
 #define P6_EVNTSEL_OS		(1 << 17)
 #define P6_EVNTSEL_USR		(1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x3C
 #define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
 
 static int setup_p6_watchdog(unsigned nmi_hz)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2cbee94..73e9361 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -25,6 +25,8 @@
 #include <linux/hrtimer.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/notifier.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/page.h>
@@ -740,9 +742,12 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
 		break;
 
+	case APIC_LVTPC:
+		/* Enable PC NMI*/
+		if (val == APIC_DM_NMI)
+			apic_write(APIC_LVTPC,val);
 	case APIC_LVTT:
 	case APIC_LVTTHMR:
-	case APIC_LVTPC:
 	case APIC_LVT0:
 	case APIC_LVT1:
 	case APIC_LVTERR:
@@ -790,6 +795,18 @@ static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
 	return ret;
 }
 
+static int nmi_notify(struct notifier_block *self,unsigned long val, void *data) {
+
+        struct kvm *kvm;
+        kvm  = list_entry(vm_list.next, struct kvm, vm_list);
+	kvm_x86_ops->inject_nmi(kvm->vcpus[0]);
+        return NOTIFY_STOP;
+}
+
+static struct notifier_block nmi_notifier = {
+                .notifier_call = nmi_notify,
+};
+
 void kvm_free_lapic(struct kvm_vcpu *vcpu)
 {
 	if (!vcpu->arch.apic)
@@ -801,6 +818,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 		__free_page(vcpu->arch.apic->regs_page);
 
 	kfree(vcpu->arch.apic);
+	unregister_die_notifier(&nmi_notifier);
 }
 
 /*
@@ -1005,6 +1023,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	apic->dev.in_range = apic_mmio_range;
 	apic->dev.private = apic;
 
+	register_die_notifier(&nmi_notifier);
 	return 0;
 nomem_free_apic:
 	kfree(apic);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 00a00e4..fcffab1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -89,6 +89,7 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 
 static struct page *vmx_io_bitmap_a;
 static struct page *vmx_io_bitmap_b;
+static struct page *vmx_msr_bitmap;
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -982,7 +983,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	u32 _vmexit_control = 0;
 	u32 _vmentry_control = 0;
 
-	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+	min = PIN_BASED_EXT_INTR_MASK;
 	opt = 0;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
 				&_pin_based_exec_control) < 0)
@@ -994,8 +995,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	      CPU_BASED_CR8_STORE_EXITING |
 #endif
 	      CPU_BASED_USE_IO_BITMAPS |
+	      CPU_BASED_USE_MSR_BITMAPS |
 	      CPU_BASED_MOV_DR_EXITING |
 	      CPU_BASED_USE_TSC_OFFSETING;
+
 	opt = CPU_BASED_TPR_SHADOW |
 	      CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
@@ -1568,6 +1571,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
 	vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
 
+	/* MSR BITMAP */
+	vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap));
+
 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
 	/* Control */
@@ -1786,6 +1792,14 @@ out:
 	return ret;
 }
 
+static void vmx_inject_nmi(struct kvm_vcpu *vcpu) {
+
+	struct vcpu_vmx * vmx = to_vmx(vcpu);
+		if (vmx->launched)
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			2 | INTR_TYPE_NMI | INTR_INFO_VALID_MASK);
+}
+
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2686,6 +2700,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.exception_injected = vmx_exception_injected,
 	.inject_pending_irq = vmx_intr_assist,
 	.inject_pending_vectors = do_interrupt_requests,
+	.inject_nmi = vmx_inject_nmi,
 
 	.set_tss_addr = vmx_set_tss_addr,
 };
@@ -2700,7 +2715,11 @@ static int __init vmx_init(void)
 		return -ENOMEM;
 
 	vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
-	if (!vmx_io_bitmap_b) {
+	if (!vmx_io_bitmap_b)
+		r = -ENOMEM;
+
+	vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+	if (!vmx_msr_bitmap) {
 		r = -ENOMEM;
 		goto out;
 	}
@@ -2718,6 +2737,15 @@ static int __init vmx_init(void)
 	memset(iova, 0xff, PAGE_SIZE);
 	kunmap(vmx_io_bitmap_b);
 
+	iova = kmap(vmx_msr_bitmap);
+	memset(iova, 0xff, PAGE_SIZE);
+	/* Enable direct access to first perfmon MSR */
+	clear_bit(MSR_P6_PERFCTR0, iova);
+	clear_bit(MSR_P6_EVNTSEL0, iova);
+	clear_bit(MSR_P6_PERFCTR0, iova + 2048);
+	clear_bit(MSR_P6_EVNTSEL0, iova + 2048);
+	kunmap(vmx_msr_bitmap);
+
 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
 
 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
@@ -2730,8 +2758,9 @@ static int __init vmx_init(void)
 	return 0;
 
 out1:
-	__free_page(vmx_io_bitmap_b);
+	__free_page(vmx_msr_bitmap);
 out:
+	__free_page(vmx_io_bitmap_b);
 	__free_page(vmx_io_bitmap_a);
 	return r;
 }
@@ -2740,6 +2769,7 @@ static void __exit vmx_exit(void)
 {
 	__free_page(vmx_io_bitmap_b);
 	__free_page(vmx_io_bitmap_a);
+	__free_page(vmx_msr_bitmap);
 
 	kvm_exit();
 }
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 436ce0f..1b6d6a8 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -242,6 +242,7 @@ enum vmcs_field {
 #define VECTORING_INFO_VALID_MASK       	INTR_INFO_VALID_MASK
 
 #define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
+#define INTR_TYPE_NMI			(2 << 8)
 #define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
 #define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
 
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 67ae307..f17248d 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -387,6 +387,7 @@ struct kvm_x86_ops {
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
 	bool (*exception_injected)(struct kvm_vcpu *vcpu);
+	void (*inject_nmi)(struct kvm_vcpu *vcpu);
 	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
 	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
 				       struct kvm_run *run);
---

thanks,
balaji rao

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply related	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-01-30 15:13 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-28  8:18 [RFC] nmi watchdog in kvm Balaji Rao
     [not found] ` <200801281348.41010.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2008-01-28 10:57   ` Ingo Molnar
2008-01-28 12:29   ` Joerg Roedel
     [not found]     ` <20080128122925.GA11803-5C7GfCeVMHo@public.gmane.org>
2008-01-28 14:42       ` Balaji Rao
     [not found]         ` <200801282012.26225.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2008-01-28 15:00           ` Joerg Roedel
     [not found]             ` <20080128150032.GF6960-5C7GfCeVMHo@public.gmane.org>
2008-01-28 16:30               ` Balaji Rao
2008-01-30 10:43   ` Avi Kivity
     [not found]     ` <47A054EE.6010408-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-30 14:22       ` Balaji Rao
     [not found]         ` <200801301952.57604.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2008-01-30 14:39           ` Avi Kivity
     [not found]             ` <47A08C24.8040505-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-30 15:02               ` Balaji Rao
     [not found]                 ` <200801302032.01603.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2008-01-30 15:13                   ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox