public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC] nmi watchdog in kvm
@ 2008-01-28  8:18 Balaji Rao
       [not found] ` <200801281348.41010.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Balaji Rao @ 2008-01-28  8:18 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Hello,

I was trying to enable the use of nmi watchdog within a linux guest running in kvm. I have done it 
by allowing direct access to perfmon msrs using the MSR_BITMAP field in vmcs region.

Most of the times the NMI Watchdog Test in the guest fails, but with a finite number of NMI's 
received  by the guest. But randomly it does work! Whenever it fails, i get this vmwrite error :

vmwrite error: reg 4016 value 80000202 (err 164061)

I have a few questions.

1. How are NMI's supposed to be delivered to the guest ? I did this by adding a new op to
kvm_x86_ops.
2. How am I supposed to handle perfmon MSRs ? Direct access may pose problems during migration. But 
am not sure how costly emulation by abstraction would be..
I have not yet considered saving the MSRS upon vmexits to allow multiple VMs use the MSRs. I think i 
can do them easily when i get this working.

Here's the code. Please tell me what dumb mistake I am doing.


diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index c02541e..276048a 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -342,7 +342,7 @@ static const struct wd_ops k7_wd_ops = {
 #define P6_EVNTSEL_INT		(1 << 20)
 #define P6_EVNTSEL_OS		(1 << 17)
 #define P6_EVNTSEL_USR		(1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x3C
 #define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
 
 static int setup_p6_watchdog(unsigned nmi_hz)
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 2cbee94..73e9361 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -25,6 +25,8 @@
 #include <linux/hrtimer.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/kdebug.h>
+#include <linux/notifier.h>
 #include <asm/processor.h>
 #include <asm/msr.h>
 #include <asm/page.h>
@@ -740,9 +742,12 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
 		break;
 
+	case APIC_LVTPC:
+		/* Enable PC NMI*/
+		if (val == APIC_DM_NMI)
+			apic_write(APIC_LVTPC,val);
 	case APIC_LVTT:
 	case APIC_LVTTHMR:
-	case APIC_LVTPC:
 	case APIC_LVT0:
 	case APIC_LVT1:
 	case APIC_LVTERR:
@@ -790,6 +795,18 @@ static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
 	return ret;
 }
 
+static int nmi_notify(struct notifier_block *self,unsigned long val, void *data) {
+
+        struct kvm *kvm;
+        kvm  = list_entry(vm_list.next, struct kvm, vm_list);
+	kvm_x86_ops->inject_nmi(kvm->vcpus[0]);
+        return NOTIFY_STOP;
+}
+
+static struct notifier_block nmi_notifier = {
+                .notifier_call = nmi_notify,
+};
+
 void kvm_free_lapic(struct kvm_vcpu *vcpu)
 {
 	if (!vcpu->arch.apic)
@@ -801,6 +818,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 		__free_page(vcpu->arch.apic->regs_page);
 
 	kfree(vcpu->arch.apic);
+	unregister_die_notifier(&nmi_notifier);
 }
 
 /*
@@ -1005,6 +1023,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	apic->dev.in_range = apic_mmio_range;
 	apic->dev.private = apic;
 
+	register_die_notifier(&nmi_notifier);
 	return 0;
 nomem_free_apic:
 	kfree(apic);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 00a00e4..fcffab1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -89,6 +89,7 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 
 static struct page *vmx_io_bitmap_a;
 static struct page *vmx_io_bitmap_b;
+static struct page *vmx_msr_bitmap;
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -982,7 +983,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	u32 _vmexit_control = 0;
 	u32 _vmentry_control = 0;
 
-	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
+	min = PIN_BASED_EXT_INTR_MASK;
 	opt = 0;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
 				&_pin_based_exec_control) < 0)
@@ -994,8 +995,10 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 	      CPU_BASED_CR8_STORE_EXITING |
 #endif
 	      CPU_BASED_USE_IO_BITMAPS |
+	      CPU_BASED_USE_MSR_BITMAPS |
 	      CPU_BASED_MOV_DR_EXITING |
 	      CPU_BASED_USE_TSC_OFFSETING;
+
 	opt = CPU_BASED_TPR_SHADOW |
 	      CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
@@ -1568,6 +1571,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 	vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a));
 	vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b));
 
+	/* MSR BITMAP */
+	vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap));
+
 	vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
 
 	/* Control */
@@ -1786,6 +1792,14 @@ out:
 	return ret;
 }
 
+static void vmx_inject_nmi(struct kvm_vcpu *vcpu) {
+
+	struct vcpu_vmx * vmx = to_vmx(vcpu);
+		if (vmx->launched)
+		vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+			2 | INTR_TYPE_NMI | INTR_INFO_VALID_MASK);
+}
+
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2686,6 +2700,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.exception_injected = vmx_exception_injected,
 	.inject_pending_irq = vmx_intr_assist,
 	.inject_pending_vectors = do_interrupt_requests,
+	.inject_nmi = vmx_inject_nmi,
 
 	.set_tss_addr = vmx_set_tss_addr,
 };
@@ -2700,7 +2715,11 @@ static int __init vmx_init(void)
 		return -ENOMEM;
 
 	vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
-	if (!vmx_io_bitmap_b) {
+	if (!vmx_io_bitmap_b)
+		r = -ENOMEM;
+
+	vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
+	if (!vmx_msr_bitmap) {
 		r = -ENOMEM;
 		goto out;
 	}
@@ -2718,6 +2737,15 @@ static int __init vmx_init(void)
 	memset(iova, 0xff, PAGE_SIZE);
 	kunmap(vmx_io_bitmap_b);
 
+	iova = kmap(vmx_msr_bitmap);
+	memset(iova, 0xff, PAGE_SIZE);
+	/* Enable direct access to first perfmon MSR */
+	clear_bit(MSR_P6_PERFCTR0, iova);
+	clear_bit(MSR_P6_EVNTSEL0, iova);
+	clear_bit(MSR_P6_PERFCTR0, iova + 2048);
+	clear_bit(MSR_P6_EVNTSEL0, iova + 2048);
+	kunmap(vmx_msr_bitmap);
+
 	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
 
 	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE);
@@ -2730,8 +2758,9 @@ static int __init vmx_init(void)
 	return 0;
 
 out1:
-	__free_page(vmx_io_bitmap_b);
+	__free_page(vmx_msr_bitmap);
 out:
+	__free_page(vmx_io_bitmap_b);
 	__free_page(vmx_io_bitmap_a);
 	return r;
 }
@@ -2740,6 +2769,7 @@ static void __exit vmx_exit(void)
 {
 	__free_page(vmx_io_bitmap_b);
 	__free_page(vmx_io_bitmap_a);
+	__free_page(vmx_msr_bitmap);
 
 	kvm_exit();
 }
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 436ce0f..1b6d6a8 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -242,6 +242,7 @@ enum vmcs_field {
 #define VECTORING_INFO_VALID_MASK       	INTR_INFO_VALID_MASK
 
 #define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
+#define INTR_TYPE_NMI			(2 << 8)
 #define INTR_TYPE_EXCEPTION             (3 << 8) /* processor exception */
 #define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
 
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 67ae307..f17248d 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -387,6 +387,7 @@ struct kvm_x86_ops {
 	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
 				bool has_error_code, u32 error_code);
 	bool (*exception_injected)(struct kvm_vcpu *vcpu);
+	void (*inject_nmi)(struct kvm_vcpu *vcpu);
 	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
 	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
 				       struct kvm_run *run);
---

thanks,
balaji rao

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [RFC] nmi watchdog in kvm
       [not found] ` <200801281348.41010.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2008-01-28 10:57   ` Ingo Molnar
  2008-01-28 12:29   ` Joerg Roedel
  2008-01-30 10:43   ` Avi Kivity
  2 siblings, 0 replies; 11+ messages in thread
From: Ingo Molnar @ 2008-01-28 10:57 UTC (permalink / raw)
  To: Balaji Rao; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f


* Balaji Rao <balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:

> I was trying to enable the use of nmi watchdog within a linux guest 
> running in kvm. I have done it by allowing direct access to perfmon 
> msrs using the MSR_BITMAP field in vmcs region.

your patch looks pretty clean and simple to me - and it would make quite 
a bit of sense from the "completeness of architecture" POV to inject 
NMIs into KVM - even though it's obviously not that hard to interrupt a 
KVM guest context via other means ;-)

at the least this would test the correctness of guest kernel NMI 
handling. We could even test things by injecting a very large stream of 
NMIs, which is not that easy on native hardware, etc.

( the emulation of "NMI reason" IO port could be accelerated too 
  perhaps, instead of passing it over to qemu. )

so it's a good idea IMO.

	Ingo

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] nmi watchdog in kvm
       [not found] ` <200801281348.41010.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2008-01-28 10:57   ` Ingo Molnar
@ 2008-01-28 12:29   ` Joerg Roedel
       [not found]     ` <20080128122925.GA11803-5C7GfCeVMHo@public.gmane.org>
  2008-01-30 10:43   ` Avi Kivity
  2 siblings, 1 reply; 11+ messages in thread
From: Joerg Roedel @ 2008-01-28 12:29 UTC (permalink / raw)
  To: Balaji Rao; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Mon, Jan 28, 2008 at 01:48:40PM +0530, Balaji Rao wrote:
> Hello,
> 
> I was trying to enable the use of nmi watchdog within a linux guest running in kvm. I have done it 
> by allowing direct access to perfmon msrs using the MSR_BITMAP field in vmcs region.

Is there a proper virtualization of the performance counter MSRs on
Intel? If not this patch will destroy any host side performance
monitoring.

> Most of the times the NMI Watchdog Test in the guest fails, but with a finite number of NMI's 
> received  by the guest. But randomly it does work! Whenever it fails, i get this vmwrite error :
> 
> vmwrite error: reg 4016 value 80000202 (err 164061)
> 
> I have a few questions.
> 
> 1. How are NMI's supposed to be delivered to the guest ? I did this by adding a new op to
> kvm_x86_ops.
> 2. How am I supposed to handle perfmon MSRs ? Direct access may pose problems during migration. But 
> am not sure how costly emulation by abstraction would be..
> I have not yet considered saving the MSRS upon vmexits to allow multiple VMs use the MSRs. I think i 
> can do them easily when i get this working.

This patch kills the ability of KVM to migrate between AMD and Intel
because the Intel performance counters are not available on AMD and vice
verca.

> diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> index 67ae307..f17248d 100644
> --- a/include/asm-x86/kvm_host.h
> +++ b/include/asm-x86/kvm_host.h
> @@ -387,6 +387,7 @@ struct kvm_x86_ops {
>  	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
>  				bool has_error_code, u32 error_code);
>  	bool (*exception_injected)(struct kvm_vcpu *vcpu);
> +	void (*inject_nmi)(struct kvm_vcpu *vcpu);

The implementation of this new callback for SVM is missing.

>  	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
>  	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
>  				       struct kvm_run *run);
> ---

-- 
           |           AMD Saxony Limited Liability Company & Co. KG
 Operating |         Wilschdorfer Landstr. 101, 01109 Dresden, Germany
 System    |                  Register Court Dresden: HRA 4896
 Research  |              General Partner authorized to represent:
 Center    |             AMD Saxony LLC (Wilmington, Delaware, US)
           | General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy



-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] nmi watchdog in kvm
       [not found]     ` <20080128122925.GA11803-5C7GfCeVMHo@public.gmane.org>
@ 2008-01-28 14:42       ` Balaji Rao
       [not found]         ` <200801282012.26225.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Balaji Rao @ 2008-01-28 14:42 UTC (permalink / raw)
  To: Joerg Roedel
  Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	balajirrao-Re5JQEeQqe8AvxtiuMwx3w

On Monday 28 January 2008 05:59:25 pm Joerg Roedel wrote:
> On Mon, Jan 28, 2008 at 01:48:40PM +0530, Balaji Rao wrote:
> > Hello,
> >
> > I was trying to enable the use of nmi watchdog within a linux guest
> > running in kvm. I have done it by allowing direct access to perfmon msrs
> > using the MSR_BITMAP field in vmcs region.
>
> Is there a proper virtualization of the performance counter MSRs on
> Intel? If not this patch will destroy any host side performance
> monitoring.
>
No. It has to be handled by the VMM. I shall do this in a later patch. This is 
just a quick and dirty attempt to solve this problem.

<snip>

> This patch kills the ability of KVM to migrate between AMD and Intel
> because the Intel performance counters are not available on AMD and vice
> verca.
>
Yes. The way we should solve this is by emulating the MSRs. Am not sure about 
the overhead involved. What are your thoughts on this ?

> > diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> > index 67ae307..f17248d 100644
> > --- a/include/asm-x86/kvm_host.h
> > +++ b/include/asm-x86/kvm_host.h
> > @@ -387,6 +387,7 @@ struct kvm_x86_ops {
> >  	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
> >  				bool has_error_code, u32 error_code);
> >  	bool (*exception_injected)(struct kvm_vcpu *vcpu);
> > +	void (*inject_nmi)(struct kvm_vcpu *vcpu);
>
> The implementation of this new callback for SVM is missing.
I just wanted to get it running on my hardware first! :) Will implement it for 
SVM once I get the approach right.
>
> >  	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
> >  	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
> >  				       struct kvm_run *run);
> > ---

thank you for the comments,

regards,
balaji rao


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] nmi watchdog in kvm
       [not found]         ` <200801282012.26225.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2008-01-28 15:00           ` Joerg Roedel
       [not found]             ` <20080128150032.GF6960-5C7GfCeVMHo@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Joerg Roedel @ 2008-01-28 15:00 UTC (permalink / raw)
  To: Balaji Rao; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Mon, Jan 28, 2008 at 08:12:26PM +0530, Balaji Rao wrote:
> On Monday 28 January 2008 05:59:25 pm Joerg Roedel wrote:
> > This patch kills the ability of KVM to migrate between AMD and Intel
> > because the Intel performance counters are not available on AMD and vice
> > verca.
> >
> Yes. The way we should solve this is by emulating the MSRs. Am not sure about 
> the overhead involved. What are your thoughts on this ?

I think you can't do this with performance counters in KVM (except you
emulate the Intel PerfCtrs on AMD and the AMD PerfCtrs on Intel, which
might not be possible, at least very painfull). I think the best
solution to implement a watchdog for KVM is to implement another NMI
source, e.g. emulating a watchdog PCI card in QEMU.

> > > diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
> > > index 67ae307..f17248d 100644
> > > --- a/include/asm-x86/kvm_host.h
> > > +++ b/include/asm-x86/kvm_host.h
> > > @@ -387,6 +387,7 @@ struct kvm_x86_ops {
> > >  	void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr,
> > >  				bool has_error_code, u32 error_code);
> > >  	bool (*exception_injected)(struct kvm_vcpu *vcpu);
> > > +	void (*inject_nmi)(struct kvm_vcpu *vcpu);
> >
> > The implementation of this new callback for SVM is missing.
> I just wanted to get it running on my hardware first! :) Will implement it for 
> SVM once I get the approach right.
> >
> > >  	void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
> > >  	void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
> > >  				       struct kvm_run *run);
> > > ---
> 
> thank you for the comments,
> 
> regards,
> balaji rao
> 
> 
> 

-- 
           |           AMD Saxony Limited Liability Company & Co. KG
 Operating |         Wilschdorfer Landstr. 101, 01109 Dresden, Germany
 System    |                  Register Court Dresden: HRA 4896
 Research  |              General Partner authorized to represent:
 Center    |             AMD Saxony LLC (Wilmington, Delaware, US)
           | General Manager of AMD Saxony LLC: Dr. Hans-R. Deppe, Thomas McCoy



-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] nmi watchdog in kvm
       [not found]             ` <20080128150032.GF6960-5C7GfCeVMHo@public.gmane.org>
@ 2008-01-28 16:30               ` Balaji Rao
  0 siblings, 0 replies; 11+ messages in thread
From: Balaji Rao @ 2008-01-28 16:30 UTC (permalink / raw)
  To: Joerg Roedel; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Monday 28 January 2008 08:30:32 pm Joerg Roedel wrote:
> On Mon, Jan 28, 2008 at 08:12:26PM +0530, Balaji Rao wrote:
> > On Monday 28 January 2008 05:59:25 pm Joerg Roedel wrote:
> > > This patch kills the ability of KVM to migrate between AMD and Intel
> > > because the Intel performance counters are not available on AMD and
> > > vice verca.
> >
> > Yes. The way we should solve this is by emulating the MSRs. Am not sure
> > about the overhead involved. What are your thoughts on this ?
>
> I think you can't do this with performance counters in KVM (except you
> emulate the Intel PerfCtrs on AMD and the AMD PerfCtrs on Intel, which
> might not be possible, at least very painfull). I think the best
> solution to implement a watchdog for KVM is to implement another NMI
> source, e.g. emulating a watchdog PCI card in QEMU.
>
Yes, it would be painful to emulate non native perform counters.. 

But having a PCI device for generating NMIs would solve the problem of NMI 
Watchdogs. But we still wouldn't be able to provide perfmon counters to the 
guest.. But before that, we need to decide whether it is needed to provide 
perfmon counters to the guest or not. 

To make the guest use QEmu's PCI we need to pass another kernel param (say, 
nmi_watchdog=3) which is not very nice.So, can't we implement NMI watchdog 
using paravirt ? 

Or the other option would be to probe for the NMI Watchdog PCI card before 
trying to use other means of generating NMIs..

Could anyone please point me to any docs on writing a QEmu PCI device driver ?

regards,
balaji rao

-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] nmi watchdog in kvm
       [not found] ` <200801281348.41010.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  2008-01-28 10:57   ` Ingo Molnar
  2008-01-28 12:29   ` Joerg Roedel
@ 2008-01-30 10:43   ` Avi Kivity
       [not found]     ` <47A054EE.6010408-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  2 siblings, 1 reply; 11+ messages in thread
From: Avi Kivity @ 2008-01-30 10:43 UTC (permalink / raw)
  To: Balaji Rao; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Balaji Rao wrote:
> Hello,
>
> I was trying to enable the use of nmi watchdog within a linux guest running in kvm. I have done it 
> by allowing direct access to perfmon msrs using the MSR_BITMAP field in vmcs region.
>
> Most of the times the NMI Watchdog Test in the guest fails, but with a finite number of NMI's 
> received  by the guest. But randomly it does work! Whenever it fails, i get this vmwrite error :
>
> vmwrite error: reg 4016 value 80000202 (err 164061)
>
> I have a few questions.
>
> 1. How are NMI's supposed to be delivered to the guest ? I did this by adding a new op to
> kvm_x86_ops.
>   

That's fine. ops are there to be extended.

> 2. How am I supposed to handle perfmon MSRs ? Direct access may pose problems during migration. But 
> am not sure how costly emulation by abstraction would be..
> I have not yet considered saving the MSRS upon vmexits to allow multiple VMs use the MSRs. I think i 
> can do them easily when i get this working.
>   

I don't think there's a sane way to emulate the non-architectural 
perfmon counters. It may be possible to do so for the architectural 
ones, but I'm not sure. So pass-through (with saving and restoring) is 
the only option.

> Here's the code. Please tell me what dumb mistake I am doing.
>
>
> diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
> index c02541e..276048a 100644
> --- a/arch/x86/kernel/cpu/perfctr-watchdog.c
> +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
> @@ -342,7 +342,7 @@ static const struct wd_ops k7_wd_ops = {
>  #define P6_EVNTSEL_INT		(1 << 20)
>  #define P6_EVNTSEL_OS		(1 << 17)
>  #define P6_EVNTSEL_USR		(1 << 16)
> -#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x79
> +#define P6_EVENT_CPU_CLOCKS_NOT_HALTED	0x3C
>  #define P6_NMI_EVENT		P6_EVENT_CPU_CLOCKS_NOT_HALTED
>   

What's this?


>  
>  static int setup_p6_watchdog(unsigned nmi_hz)
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 2cbee94..73e9361 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -25,6 +25,8 @@
>  #include <linux/hrtimer.h>
>  #include <linux/io.h>
>  #include <linux/module.h>
> +#include <linux/kdebug.h>
> +#include <linux/notifier.h>
>  #include <asm/processor.h>
>  #include <asm/msr.h>
>  #include <asm/page.h>
> @@ -740,9 +742,12 @@ static void apic_mmio_write(struct kvm_io_device *this,
>  		apic_set_reg(apic, APIC_ICR2, val & 0xff000000);
>  		break;
>  
> +	case APIC_LVTPC:
> +		/* Enable PC NMI*/
> +		if (val == APIC_DM_NMI)
> +			apic_write(APIC_LVTPC,val);
>   

You're writing to the precious host apic here.

- need to disallow if the host is using it
- need to prevent illegal values
- need to use some sort of perfmon api rather than directly banging on 
the apic

> @@ -790,6 +795,18 @@ static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
>  	return ret;
>  }
>  
> +static int nmi_notify(struct notifier_block *self,unsigned long val, void *data) {
> +
> +        struct kvm *kvm;
> +        kvm  = list_entry(vm_list.next, struct kvm, vm_list);
> +	kvm_x86_ops->inject_nmi(kvm->vcpus[0]);
> +        return NOTIFY_STOP;
> +}
>   

You're not guaranteed to be in vcpu context here, that's what's causing 
the vmwrite errors.

Enabling on guest entry and disabling on guest exit is critical, both 
for accuracy and correctness.




-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] nmi watchdog in kvm
       [not found]     ` <47A054EE.6010408-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2008-01-30 14:22       ` Balaji Rao
       [not found]         ` <200801301952.57604.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Balaji Rao @ 2008-01-30 14:22 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Wednesday 30 January 2008 04:13:58 pm Avi Kivity wrote:
> > @@ -790,6 +795,18 @@ static int apic_mmio_range(struct kvm_io_device
> > *this, gpa_t addr) return ret;
> >  }
> >
> > +static int nmi_notify(struct notifier_block *self,unsigned long val,
> > void *data) { +
> > +        struct kvm *kvm;
> > +        kvm  = list_entry(vm_list.next, struct kvm, vm_list);
> > +	kvm_x86_ops->inject_nmi(kvm->vcpus[0]);
> > +        return NOTIFY_STOP;
> > +}
>
> You're not guaranteed to be in vcpu context here, that's what's causing
> the vmwrite errors.
>
I intended to do this here. Looks like its not the right way to check for 
presence in vcpu context. How do i do it ? please explain.

+static void vmx_inject_nmi(struct kvm_vcpu *vcpu) {
+
+       struct vcpu_vmx * vmx = to_vmx(vcpu);
+               if (vmx->launched)
+               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+                       2 | INTR_TYPE_NMI | INTR_INFO_VALID_MASK);
+}

regards,
balaji rao


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] nmi watchdog in kvm
       [not found]         ` <200801301952.57604.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2008-01-30 14:39           ` Avi Kivity
       [not found]             ` <47A08C24.8040505-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Avi Kivity @ 2008-01-30 14:39 UTC (permalink / raw)
  To: Balaji Rao; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Balaji Rao wrote:
> On Wednesday 30 January 2008 04:13:58 pm Avi Kivity wrote:
>   
>>> @@ -790,6 +795,18 @@ static int apic_mmio_range(struct kvm_io_device
>>> *this, gpa_t addr) return ret;
>>>  }
>>>
>>> +static int nmi_notify(struct notifier_block *self,unsigned long val,
>>> void *data) { +
>>> +        struct kvm *kvm;
>>> +        kvm  = list_entry(vm_list.next, struct kvm, vm_list);
>>> +	kvm_x86_ops->inject_nmi(kvm->vcpus[0]);
>>> +        return NOTIFY_STOP;
>>> +}
>>>       
>> You're not guaranteed to be in vcpu context here, that's what's causing
>> the vmwrite errors.
>>
>>     
> I intended to do this here. Looks like its not the right way to check for 
> presence in vcpu context. How do i do it ? please explain.
>
> +static void vmx_inject_nmi(struct kvm_vcpu *vcpu) {
> +
> +       struct vcpu_vmx * vmx = to_vmx(vcpu);
> +               if (vmx->launched)
> +               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> +                       2 | INTR_TYPE_NMI | INTR_INFO_VALID_MASK);
> +}
>
>   

No, this is the wrong place.  If you have a vcpu, you'd better be in 
vcpu context.

The way to do it is to set a bit in vcpu->requests and force an IPI to 
vcpu->cpu.  You can look at kvm_flush_remote_tlbs() for an example 
(another example is in the lapic IPI code, actually a better one since 
it also wakes the cpu up if it is in hlt state).

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] nmi watchdog in kvm
       [not found]             ` <47A08C24.8040505-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2008-01-30 15:02               ` Balaji Rao
       [not found]                 ` <200801302032.01603.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 11+ messages in thread
From: Balaji Rao @ 2008-01-30 15:02 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Wednesday 30 January 2008 08:09:32 pm Avi Kivity wrote:
> >
> > I intended to do this here. Looks like its not the right way to check for
> > presence in vcpu context. How do i do it ? please explain.
> >
> > +static void vmx_inject_nmi(struct kvm_vcpu *vcpu) {
> > +
> > +       struct vcpu_vmx * vmx = to_vmx(vcpu);
> > +               if (vmx->launched)
> > +               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
> > +                       2 | INTR_TYPE_NMI | INTR_INFO_VALID_MASK);
> > +}
>
> No, this is the wrong place.  If you have a vcpu, you'd better be in
> vcpu context.
Oh.. ok. Looks like I have not understood the vcpu concept correctly. Will put 
more thought into it and understand it before I attempt to fix this.

thank you,
balaji rao


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [RFC] nmi watchdog in kvm
       [not found]                 ` <200801302032.01603.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2008-01-30 15:13                   ` Avi Kivity
  0 siblings, 0 replies; 11+ messages in thread
From: Avi Kivity @ 2008-01-30 15:13 UTC (permalink / raw)
  To: Balaji Rao; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Balaji Rao wrote:
> On Wednesday 30 January 2008 08:09:32 pm Avi Kivity wrote:
>   
>>> I intended to do this here. Looks like its not the right way to check for
>>> presence in vcpu context. How do i do it ? please explain.
>>>
>>> +static void vmx_inject_nmi(struct kvm_vcpu *vcpu) {
>>> +
>>> +       struct vcpu_vmx * vmx = to_vmx(vcpu);
>>> +               if (vmx->launched)
>>> +               vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
>>> +                       2 | INTR_TYPE_NMI | INTR_INFO_VALID_MASK);
>>> +}
>>>       
>> No, this is the wrong place.  If you have a vcpu, you'd better be in
>> vcpu context.
>>     
> Oh.. ok. Looks like I have not understood the vcpu concept correctly. Will put 
> more thought into it and understand it before I attempt to fix this.
>
>   

To be clear, vcpu context is the time between vcpu_load() and 
vcpu_put(), holding vcpu->mutex.

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-01-30 15:13 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-01-28  8:18 [RFC] nmi watchdog in kvm Balaji Rao
     [not found] ` <200801281348.41010.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2008-01-28 10:57   ` Ingo Molnar
2008-01-28 12:29   ` Joerg Roedel
     [not found]     ` <20080128122925.GA11803-5C7GfCeVMHo@public.gmane.org>
2008-01-28 14:42       ` Balaji Rao
     [not found]         ` <200801282012.26225.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2008-01-28 15:00           ` Joerg Roedel
     [not found]             ` <20080128150032.GF6960-5C7GfCeVMHo@public.gmane.org>
2008-01-28 16:30               ` Balaji Rao
2008-01-30 10:43   ` Avi Kivity
     [not found]     ` <47A054EE.6010408-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-30 14:22       ` Balaji Rao
     [not found]         ` <200801301952.57604.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2008-01-30 14:39           ` Avi Kivity
     [not found]             ` <47A08C24.8040505-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2008-01-30 15:02               ` Balaji Rao
     [not found]                 ` <200801302032.01603.balajirrao-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2008-01-30 15:13                   ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox