[RFC] KVM: Fix simultaneous NMIs

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [RFC] KVM: Fix simultaneous NMIs
@ 2011-09-15 14:45 Avi Kivity
  2011-09-15 16:01 ` Jan Kiszka
  0 siblings, 1 reply; 14+ messages in thread
From: Avi Kivity @ 2011-09-15 14:45 UTC (permalink / raw)
  To: Marcelo Tosatti, Jan Kiszka, kvm

If simultaneous NMIs happen, we're supposed to queue the second
and next (collapsing them), but currently we sometimes collapse
the second into the first.

Fix by using a counter for pending NMIs instead of a bool; collapsing
happens when the NMI window reopens.

Signed-off-by: Avi Kivity <avi@redhat.com>
---

Not sure whether this interacts correctly with NMI-masked-by-STI or with
save/restore.

 arch/x86/include/asm/kvm_host.h |    2 +-
 arch/x86/kvm/svm.c              |    1 +
 arch/x86/kvm/vmx.c              |    3 ++-
 arch/x86/kvm/x86.c              |   33 +++++++++++++++------------------
 arch/x86/kvm/x86.h              |    7 +++++++
 5 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ab4241..3a95885 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -413,7 +413,7 @@ struct kvm_vcpu_arch {
 	u32  tsc_catchup_mult;
 	s8   tsc_catchup_shift;
 
-	bool nmi_pending;
+	atomic_t nmi_pending;
 	bool nmi_injected;
 
 	struct mtrr_state_type mtrr_state;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index e7ed4b1..d4c792f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3609,6 +3609,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 	if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
 	    && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
 		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
+		kvm_collapse_pending_nmis(&svm->vcpu);
 		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
 	}
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index a0d6bd9..745dadb 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4761,6 +4761,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
 	cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 	++vcpu->stat.nmi_window_exits;
+	kvm_collapse_pending_nmis(vcpu);
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 
 	return 1;
@@ -5790,7 +5791,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 		if (vmx_interrupt_allowed(vcpu)) {
 			vmx->soft_vnmi_blocked = 0;
 		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
-			   vcpu->arch.nmi_pending) {
+			   atomic_read(&vcpu->arch.nmi_pending)) {
 			/*
 			 * This CPU don't support us in finding the end of an
 			 * NMI-blocked window if the guest runs with IRQs
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6b37f18..d4f45e0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -359,8 +359,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 
 void kvm_inject_nmi(struct kvm_vcpu *vcpu)
 {
+	atomic_inc(&vcpu->arch.nmi_pending);
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
-	vcpu->arch.nmi_pending = 1;
 }
 EXPORT_SYMBOL_GPL(kvm_inject_nmi);
 
@@ -2844,7 +2844,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 			KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
 
 	events->nmi.injected = vcpu->arch.nmi_injected;
-	events->nmi.pending = vcpu->arch.nmi_pending;
+	events->nmi.pending = atomic_read(&vcpu->arch.nmi_pending) != 0;
 	events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
 	events->nmi.pad = 0;
 
@@ -2878,7 +2878,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 
 	vcpu->arch.nmi_injected = events->nmi.injected;
 	if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
-		vcpu->arch.nmi_pending = events->nmi.pending;
+		atomic_set(&vcpu->arch.nmi_pending, events->nmi.pending);
 	kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
 
 	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
@@ -4763,7 +4763,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 	kvm_set_rflags(vcpu, ctxt->eflags);
 
 	if (irq == NMI_VECTOR)
-		vcpu->arch.nmi_pending = false;
+		atomic_set(&vcpu->arch.nmi_pending, 0);
 	else
 		vcpu->arch.interrupt.pending = false;
 
@@ -5570,9 +5570,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
 	}
 
 	/* try to inject new event if pending */
-	if (vcpu->arch.nmi_pending) {
+	if (atomic_read(&vcpu->arch.nmi_pending)) {
 		if (kvm_x86_ops->nmi_allowed(vcpu)) {
-			vcpu->arch.nmi_pending = false;
+			atomic_dec(&vcpu->arch.nmi_pending);
 			vcpu->arch.nmi_injected = true;
 			kvm_x86_ops->set_nmi(vcpu);
 		}
@@ -5604,10 +5604,14 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
 	}
 }
 
+static bool nmi_in_progress(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.nmi_injected || kvm_x86_ops->get_nmi_mask(vcpu);
+}
+
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
 	int r;
-	bool nmi_pending;
 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
 		vcpu->run->request_interrupt_window;
 
@@ -5654,19 +5658,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	if (unlikely(r))
 		goto out;
 
-	/*
-	 * An NMI can be injected between local nmi_pending read and
-	 * vcpu->arch.nmi_pending read inside inject_pending_event().
-	 * But in that case, KVM_REQ_EVENT will be set, which makes
-	 * the race described above benign.
-	 */
-	nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
-
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
 		inject_pending_event(vcpu);
 
 		/* enable NMI/IRQ window open exits if needed */
-		if (nmi_pending)
+		if (atomic_read(&vcpu->arch.nmi_pending)
+		    && nmi_in_progress(vcpu))
 			kvm_x86_ops->enable_nmi_window(vcpu);
 		else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
 			kvm_x86_ops->enable_irq_window(vcpu);
@@ -6374,7 +6371,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 
 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.nmi_pending = false;
+	atomic_set(&vcpu->arch.nmi_pending, 0);
 	vcpu->arch.nmi_injected = false;
 
 	vcpu->arch.switch_db_regs = 0;
@@ -6649,7 +6646,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 		!vcpu->arch.apf.halted)
 		|| !list_empty_careful(&vcpu->async_pf.done)
 		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
-		|| vcpu->arch.nmi_pending ||
+		|| atomic_read(&vcpu->arch.nmi_pending) ||
 		(kvm_arch_interrupt_allowed(vcpu) &&
 		 kvm_cpu_has_interrupt(vcpu));
 }
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index d36fe23..ed04e2b 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -125,4 +125,11 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 	gva_t addr, void *val, unsigned int bytes,
 	struct x86_exception *exception);
 
+static inline void kvm_collapse_pending_nmis(struct kvm_vcpu *vcpu)
+{
+	/* Collapse all NMIs queued while an NMI handler was running to one */
+	if (atomic_read(&vcpu->arch.nmi_pending))
+		atomic_set(&vcpu->arch.nmi_pending, 1);
+}
+
 #endif
-- 
1.7.6.3


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-15 14:45 [RFC] KVM: Fix simultaneous NMIs Avi Kivity
@ 2011-09-15 16:01 ` Jan Kiszka
  2011-09-15 17:02   ` Avi Kivity
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Kiszka @ 2011-09-15 16:01 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm@vger.kernel.org

On 2011-09-15 16:45, Avi Kivity wrote:
> If simultaneous NMIs happen, we're supposed to queue the second
> and next (collapsing them), but currently we sometimes collapse
> the second into the first.

Can you describe the race in a few more details here ("sometimes" sounds
like "I don't know when" :) )?

> 
> Fix by using a counter for pending NMIs instead of a bool; collapsing
> happens when the NMI window reopens.
> 
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
> 
> Not sure whether this interacts correctly with NMI-masked-by-STI or with
> save/restore.
> 
>  arch/x86/include/asm/kvm_host.h |    2 +-
>  arch/x86/kvm/svm.c              |    1 +
>  arch/x86/kvm/vmx.c              |    3 ++-
>  arch/x86/kvm/x86.c              |   33 +++++++++++++++------------------
>  arch/x86/kvm/x86.h              |    7 +++++++
>  5 files changed, 26 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6ab4241..3a95885 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -413,7 +413,7 @@ struct kvm_vcpu_arch {
>  	u32  tsc_catchup_mult;
>  	s8   tsc_catchup_shift;
>  
> -	bool nmi_pending;
> +	atomic_t nmi_pending;
>  	bool nmi_injected;
>  
>  	struct mtrr_state_type mtrr_state;
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index e7ed4b1..d4c792f 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3609,6 +3609,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
>  	if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
>  	    && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
>  		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
> +		kvm_collapse_pending_nmis(&svm->vcpu);
>  		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
>  	}
>  
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a0d6bd9..745dadb 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4761,6 +4761,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
>  	cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
>  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
>  	++vcpu->stat.nmi_window_exits;
> +	kvm_collapse_pending_nmis(vcpu);
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
>  
>  	return 1;
> @@ -5790,7 +5791,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>  		if (vmx_interrupt_allowed(vcpu)) {
>  			vmx->soft_vnmi_blocked = 0;
>  		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
> -			   vcpu->arch.nmi_pending) {
> +			   atomic_read(&vcpu->arch.nmi_pending)) {
>  			/*
>  			 * This CPU don't support us in finding the end of an
>  			 * NMI-blocked window if the guest runs with IRQs
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 6b37f18..d4f45e0 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -359,8 +359,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
>  
>  void kvm_inject_nmi(struct kvm_vcpu *vcpu)
>  {
> +	atomic_inc(&vcpu->arch.nmi_pending);
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
> -	vcpu->arch.nmi_pending = 1;

Does the reordering matter? Do we need barriers?

>  }
>  EXPORT_SYMBOL_GPL(kvm_inject_nmi);
>  
> @@ -2844,7 +2844,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
>  			KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
>  
>  	events->nmi.injected = vcpu->arch.nmi_injected;
> -	events->nmi.pending = vcpu->arch.nmi_pending;
> +	events->nmi.pending = atomic_read(&vcpu->arch.nmi_pending) != 0;
>  	events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
>  	events->nmi.pad = 0;
>  
> @@ -2878,7 +2878,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
>  
>  	vcpu->arch.nmi_injected = events->nmi.injected;
>  	if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
> -		vcpu->arch.nmi_pending = events->nmi.pending;
> +		atomic_set(&vcpu->arch.nmi_pending, events->nmi.pending);
>  	kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
>  
>  	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
> @@ -4763,7 +4763,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
>  	kvm_set_rflags(vcpu, ctxt->eflags);
>  
>  	if (irq == NMI_VECTOR)
> -		vcpu->arch.nmi_pending = false;
> +		atomic_set(&vcpu->arch.nmi_pending, 0);
>  	else
>  		vcpu->arch.interrupt.pending = false;
>  
> @@ -5570,9 +5570,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
>  	}
>  
>  	/* try to inject new event if pending */
> -	if (vcpu->arch.nmi_pending) {
> +	if (atomic_read(&vcpu->arch.nmi_pending)) {
>  		if (kvm_x86_ops->nmi_allowed(vcpu)) {
> -			vcpu->arch.nmi_pending = false;
> +			atomic_dec(&vcpu->arch.nmi_pending);

Here we lost NMIs in the past by overwriting nmi_pending while another
one was already queued, right?

>  			vcpu->arch.nmi_injected = true;
>  			kvm_x86_ops->set_nmi(vcpu);
>  		}
> @@ -5604,10 +5604,14 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
>  	}
>  }
>  
> +static bool nmi_in_progress(struct kvm_vcpu *vcpu)
> +{
> +	return vcpu->arch.nmi_injected || kvm_x86_ops->get_nmi_mask(vcpu);
> +}
> +
>  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  {
>  	int r;
> -	bool nmi_pending;
>  	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
>  		vcpu->run->request_interrupt_window;
>  
> @@ -5654,19 +5658,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	if (unlikely(r))
>  		goto out;
>  
> -	/*
> -	 * An NMI can be injected between local nmi_pending read and
> -	 * vcpu->arch.nmi_pending read inside inject_pending_event().
> -	 * But in that case, KVM_REQ_EVENT will be set, which makes
> -	 * the race described above benign.
> -	 */
> -	nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
> -
>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>  		inject_pending_event(vcpu);
>  
>  		/* enable NMI/IRQ window open exits if needed */
> -		if (nmi_pending)
> +		if (atomic_read(&vcpu->arch.nmi_pending)
> +		    && nmi_in_progress(vcpu))

Is nmi_pending && !nmi_in_progress possible at all? Is it rather a BUG
condition? If not, what will happen next?

>  			kvm_x86_ops->enable_nmi_window(vcpu);
>  		else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
>  			kvm_x86_ops->enable_irq_window(vcpu);
> @@ -6374,7 +6371,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>  
>  int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
>  {
> -	vcpu->arch.nmi_pending = false;
> +	atomic_set(&vcpu->arch.nmi_pending, 0);
>  	vcpu->arch.nmi_injected = false;
>  
>  	vcpu->arch.switch_db_regs = 0;
> @@ -6649,7 +6646,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
>  		!vcpu->arch.apf.halted)
>  		|| !list_empty_careful(&vcpu->async_pf.done)
>  		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
> -		|| vcpu->arch.nmi_pending ||
> +		|| atomic_read(&vcpu->arch.nmi_pending) ||
>  		(kvm_arch_interrupt_allowed(vcpu) &&
>  		 kvm_cpu_has_interrupt(vcpu));
>  }
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index d36fe23..ed04e2b 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -125,4 +125,11 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
>  	gva_t addr, void *val, unsigned int bytes,
>  	struct x86_exception *exception);
>  
> +static inline void kvm_collapse_pending_nmis(struct kvm_vcpu *vcpu)
> +{
> +	/* Collapse all NMIs queued while an NMI handler was running to one */
> +	if (atomic_read(&vcpu->arch.nmi_pending))
> +		atomic_set(&vcpu->arch.nmi_pending, 1);

Is it OK that NMIs injected after the collapse will increment this to >
1 again? Or is that impossible?

> +}
> +
>  #endif

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-15 16:01 ` Jan Kiszka
@ 2011-09-15 17:02   ` Avi Kivity
  2011-09-15 17:25     ` Jan Kiszka
  0 siblings, 1 reply; 14+ messages in thread
From: Avi Kivity @ 2011-09-15 17:02 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm@vger.kernel.org

On 09/15/2011 07:01 PM, Jan Kiszka wrote:
> On 2011-09-15 16:45, Avi Kivity wrote:
> >  If simultaneous NMIs happen, we're supposed to queue the second
> >  and next (collapsing them), but currently we sometimes collapse
> >  the second into the first.
>
> Can you describe the race in a few more details here ("sometimes" sounds
> like "I don't know when" :) )?

In this case it was "I'm in a hurry".

> >
> >   void kvm_inject_nmi(struct kvm_vcpu *vcpu)
> >   {
> >  +	atomic_inc(&vcpu->arch.nmi_pending);
> >   	kvm_make_request(KVM_REQ_EVENT, vcpu);
> >  -	vcpu->arch.nmi_pending = 1;
>
> Does the reordering matter?

I think so.  Suppose the vcpu enters just after kvm_make_request(); it 
sees KVM_REQ_EVENT and clears it, but doesn't see nmi_pending because it 
wasn't set set.  Then comes a kick, the guest is reentered with 
nmi_pending set but KVM_REQ_EVENT clear and sails through the check and 
enters the guest.  The NMI is delayed until the next KVM_REQ_EVENT.

> Do we need barriers?

Yes.

>
> >  @@ -5570,9 +5570,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
> >   	}
> >
> >   	/* try to inject new event if pending */
> >  -	if (vcpu->arch.nmi_pending) {
> >  +	if (atomic_read(&vcpu->arch.nmi_pending)) {
> >   		if (kvm_x86_ops->nmi_allowed(vcpu)) {
> >  -			vcpu->arch.nmi_pending = false;
> >  +			atomic_dec(&vcpu->arch.nmi_pending);
>
> Here we lost NMIs in the past by overwriting nmi_pending while another
> one was already queued, right?

One place, yes.  The other is kvm_inject_nmi() - if the first nmi didn't 
get picked up by the vcpu by the time the second nmi arrives, we lose 
the second nmi.

> >   	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> >   		inject_pending_event(vcpu);
> >
> >   		/* enable NMI/IRQ window open exits if needed */
> >  -		if (nmi_pending)
> >  +		if (atomic_read(&vcpu->arch.nmi_pending)
> >  +		&&  nmi_in_progress(vcpu))
>
> Is nmi_pending&&  !nmi_in_progress possible at all?

Yes, due to NMI-blocked-by-STI.  A really touchy area.

> Is it rather a BUG
> condition?

No.

> If not, what will happen next?

The NMI window will open and we'll inject the NMI.  But I think we have 
a bug here - we should only kvm_collapse_nmis() if an NMI handler was 
indeed running, yet we do it unconditionally.

> >
> >  +static inline void kvm_collapse_pending_nmis(struct kvm_vcpu *vcpu)
> >  +{
> >  +	/* Collapse all NMIs queued while an NMI handler was running to one */
> >  +	if (atomic_read(&vcpu->arch.nmi_pending))
> >  +		atomic_set(&vcpu->arch.nmi_pending, 1);
>
> Is it OK that NMIs injected after the collapse will increment this to>
> 1 again? Or is that impossible?
>

It's possible and okay.  We're now completing execution of IRET.  Doing 
atomic_set() after atomic_inc() means the NMI happened before IRET 
completed, and vice versa.  Since these events are asynchronous, we're 
free to choose one or the other (a self-IPI-NMI just before the IRET 
must be swallowed, and a self-IPI-NMI just after the IRET would only be 
executed after the next time around the handler).

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-15 17:02   ` Avi Kivity
@ 2011-09-15 17:25     ` Jan Kiszka
  2011-09-15 17:48       ` Avi Kivity
  0 siblings, 1 reply; 14+ messages in thread
From: Jan Kiszka @ 2011-09-15 17:25 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Marcelo Tosatti, kvm@vger.kernel.org

On 2011-09-15 19:02, Avi Kivity wrote:
> On 09/15/2011 07:01 PM, Jan Kiszka wrote:
>> On 2011-09-15 16:45, Avi Kivity wrote:
>>>  If simultaneous NMIs happen, we're supposed to queue the second
>>>  and next (collapsing them), but currently we sometimes collapse
>>>  the second into the first.
>>
>> Can you describe the race in a few more details here ("sometimes" sounds
>> like "I don't know when" :) )?
> 
> In this case it was "I'm in a hurry".
> 
>>>
>>>   void kvm_inject_nmi(struct kvm_vcpu *vcpu)
>>>   {
>>>  +	atomic_inc(&vcpu->arch.nmi_pending);
>>>   	kvm_make_request(KVM_REQ_EVENT, vcpu);
>>>  -	vcpu->arch.nmi_pending = 1;
>>
>> Does the reordering matter?
> 
> I think so.  Suppose the vcpu enters just after kvm_make_request(); it 
> sees KVM_REQ_EVENT and clears it, but doesn't see nmi_pending because it 
> wasn't set set.  Then comes a kick, the guest is reentered with 
> nmi_pending set but KVM_REQ_EVENT clear and sails through the check and 
> enters the guest.  The NMI is delayed until the next KVM_REQ_EVENT.

That makes sense - and the old code looks more strange now.

> 
>> Do we need barriers?
> 
> Yes.
> 
>>
>>>  @@ -5570,9 +5570,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
>>>   	}
>>>
>>>   	/* try to inject new event if pending */
>>>  -	if (vcpu->arch.nmi_pending) {
>>>  +	if (atomic_read(&vcpu->arch.nmi_pending)) {
>>>   		if (kvm_x86_ops->nmi_allowed(vcpu)) {
>>>  -			vcpu->arch.nmi_pending = false;
>>>  +			atomic_dec(&vcpu->arch.nmi_pending);
>>
>> Here we lost NMIs in the past by overwriting nmi_pending while another
>> one was already queued, right?
> 
> One place, yes.  The other is kvm_inject_nmi() - if the first nmi didn't 
> get picked up by the vcpu by the time the second nmi arrives, we lose 
> the second nmi.

Thinking this through again, it's actually not yet clear to me what we
are modeling here: If two NMI events arrive almost perfectly in
parallel, does the real hardware guarantee that they will always cause
two NMI events in the CPU? Then this is required.

Otherwise I just lost understanding again why we were loosing NMIs here
and in kvm_inject_nmi (maybe elsewhere then?).

> 
>>>   	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>>>   		inject_pending_event(vcpu);
>>>
>>>   		/* enable NMI/IRQ window open exits if needed */
>>>  -		if (nmi_pending)
>>>  +		if (atomic_read(&vcpu->arch.nmi_pending)
>>>  +		&&  nmi_in_progress(vcpu))
>>
>> Is nmi_pending&&  !nmi_in_progress possible at all?
> 
> Yes, due to NMI-blocked-by-STI.  A really touchy area.

And we don't need the window exit notification then? I don't understand
what nmi_in_progress is supposed to do here.

> 
>> Is it rather a BUG
>> condition?
> 
> No.
> 
>> If not, what will happen next?
> 
> The NMI window will open and we'll inject the NMI.

How will we know this? We do not request the exit, that's my worry.

>  But I think we have 
> a bug here - we should only kvm_collapse_nmis() if an NMI handler was 
> indeed running, yet we do it unconditionally.
> 
>>>
>>>  +static inline void kvm_collapse_pending_nmis(struct kvm_vcpu *vcpu)
>>>  +{
>>>  +	/* Collapse all NMIs queued while an NMI handler was running to one */
>>>  +	if (atomic_read(&vcpu->arch.nmi_pending))
>>>  +		atomic_set(&vcpu->arch.nmi_pending, 1);
>>
>> Is it OK that NMIs injected after the collapse will increment this to>
>> 1 again? Or is that impossible?
>>
> 
> It's possible and okay.  We're now completing execution of IRET.  Doing 
> atomic_set() after atomic_inc() means the NMI happened before IRET 
> completed, and vice versa.  Since these events are asynchronous, we're 
> free to choose one or the other (a self-IPI-NMI just before the IRET 
> must be swallowed, and a self-IPI-NMI just after the IRET would only be 
> executed after the next time around the handler).

Need to think through this separately.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-15 17:25     ` Jan Kiszka
@ 2011-09-15 17:48       ` Avi Kivity
  2011-09-19 13:54         ` Marcelo Tosatti
  0 siblings, 1 reply; 14+ messages in thread
From: Avi Kivity @ 2011-09-15 17:48 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Marcelo Tosatti, kvm@vger.kernel.org

On 09/15/2011 08:25 PM, Jan Kiszka wrote:
> >
> >  I think so.  Suppose the vcpu enters just after kvm_make_request(); it
> >  sees KVM_REQ_EVENT and clears it, but doesn't see nmi_pending because it
> >  wasn't set set.  Then comes a kick, the guest is reentered with
> >  nmi_pending set but KVM_REQ_EVENT clear and sails through the check and
> >  enters the guest.  The NMI is delayed until the next KVM_REQ_EVENT.
>
> That makes sense - and the old code looks more strange now.

I think it dates to the time all NMIs were synchronous.

> >>>
> >>>    	/* try to inject new event if pending */
> >>>   -	if (vcpu->arch.nmi_pending) {
> >>>   +	if (atomic_read(&vcpu->arch.nmi_pending)) {
> >>>    		if (kvm_x86_ops->nmi_allowed(vcpu)) {
> >>>   -			vcpu->arch.nmi_pending = false;
> >>>   +			atomic_dec(&vcpu->arch.nmi_pending);
> >>
> >>  Here we lost NMIs in the past by overwriting nmi_pending while another
> >>  one was already queued, right?
> >
> >  One place, yes.  The other is kvm_inject_nmi() - if the first nmi didn't
> >  get picked up by the vcpu by the time the second nmi arrives, we lose
> >  the second nmi.
>
> Thinking this through again, it's actually not yet clear to me what we
> are modeling here: If two NMI events arrive almost perfectly in
> parallel, does the real hardware guarantee that they will always cause
> two NMI events in the CPU? Then this is required.

It's not 100% clear from the SDM, but this is what I understood from 
it.  And it's needed - the NMI handlers are now being reworked to handle 
just one NMI source (hopefully the cheapest) in the handler, and if we 
detect a back-to-back NMI, handle all possible NMI sources.  This 
optimization is needed in turn so we can use Jeremy's paravirt spinlock 
framework, which requires a sleep primitive and a 
wake-up-even-if-the-sleeper-has-interrupts-disabled primitive.  i 
thought of using HLT and NMIs respectively, but that means we need a 
cheap handler (i.e. don't go reading PMU MSRs).

> Otherwise I just lost understanding again why we were loosing NMIs here
> and in kvm_inject_nmi (maybe elsewhere then?).

Because of that.

> >
> >>>    	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> >>>    		inject_pending_event(vcpu);
> >>>
> >>>    		/* enable NMI/IRQ window open exits if needed */
> >>>   -		if (nmi_pending)
> >>>   +		if (atomic_read(&vcpu->arch.nmi_pending)
> >>>   +		&&   nmi_in_progress(vcpu))
> >>
> >>  Is nmi_pending&&   !nmi_in_progress possible at all?
> >
> >  Yes, due to NMI-blocked-by-STI.  A really touchy area.
>
> And we don't need the window exit notification then? I don't understand
> what nmi_in_progress is supposed to do here.

We need the window notification in both cases.  If we're recovering from 
STI, then we don't need to collapse NMIs.  If we're completing an NMI 
handler, then we do need to collapse NMIs (since the queue length is 
two, and we just completed one).

> >
> >>  If not, what will happen next?
> >
> >  The NMI window will open and we'll inject the NMI.
>
> How will we know this? We do not request the exit, that's my worry.

I think we do? Oh, but this patch breaks it.


-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-15 17:48       ` Avi Kivity
@ 2011-09-19 13:54         ` Marcelo Tosatti
  2011-09-19 14:30           ` Avi Kivity
  0 siblings, 1 reply; 14+ messages in thread
From: Marcelo Tosatti @ 2011-09-19 13:54 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jan Kiszka, kvm@vger.kernel.org

On Thu, Sep 15, 2011 at 08:48:58PM +0300, Avi Kivity wrote:
> On 09/15/2011 08:25 PM, Jan Kiszka wrote:
> >>
> >>  I think so.  Suppose the vcpu enters just after kvm_make_request(); it
> >>  sees KVM_REQ_EVENT and clears it, but doesn't see nmi_pending because it
> >>  wasn't set set.  Then comes a kick, the guest is reentered with
> >>  nmi_pending set but KVM_REQ_EVENT clear and sails through the check and
> >>  enters the guest.  The NMI is delayed until the next KVM_REQ_EVENT.
> >
> >That makes sense - and the old code looks more strange now.
> 
> I think it dates to the time all NMIs were synchronous.
> 
> >>>>
> >>>>    	/* try to inject new event if pending */
> >>>>   -	if (vcpu->arch.nmi_pending) {
> >>>>   +	if (atomic_read(&vcpu->arch.nmi_pending)) {
> >>>>    		if (kvm_x86_ops->nmi_allowed(vcpu)) {
> >>>>   -			vcpu->arch.nmi_pending = false;
> >>>>   +			atomic_dec(&vcpu->arch.nmi_pending);
> >>>
> >>>  Here we lost NMIs in the past by overwriting nmi_pending while another
> >>>  one was already queued, right?
> >>
> >>  One place, yes.  The other is kvm_inject_nmi() - if the first nmi didn't
> >>  get picked up by the vcpu by the time the second nmi arrives, we lose
> >>  the second nmi.
> >
> >Thinking this through again, it's actually not yet clear to me what we
> >are modeling here: If two NMI events arrive almost perfectly in
> >parallel, does the real hardware guarantee that they will always cause
> >two NMI events in the CPU? Then this is required.
> 
> It's not 100% clear from the SDM, but this is what I understood from
> it.  And it's needed - the NMI handlers are now being reworked to
> handle just one NMI source (hopefully the cheapest) in the handler,
> and if we detect a back-to-back NMI, handle all possible NMI
> sources.  This optimization is needed in turn so we can use Jeremy's
> paravirt spinlock framework, which requires a sleep primitive and a
> wake-up-even-if-the-sleeper-has-interrupts-disabled primitive.  i
> thought of using HLT and NMIs respectively, but that means we need a
> cheap handler (i.e. don't go reading PMU MSRs).
> 
> >Otherwise I just lost understanding again why we were loosing NMIs here
> >and in kvm_inject_nmi (maybe elsewhere then?).
> 
> Because of that.
> 
> >>
> >>>>    	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
> >>>>    		inject_pending_event(vcpu);
> >>>>
> >>>>    		/* enable NMI/IRQ window open exits if needed */
> >>>>   -		if (nmi_pending)
> >>>>   +		if (atomic_read(&vcpu->arch.nmi_pending)
> >>>>   +		&&   nmi_in_progress(vcpu))
> >>>
> >>>  Is nmi_pending&&   !nmi_in_progress possible at all?
> >>
> >>  Yes, due to NMI-blocked-by-STI.  A really touchy area.
> >And we don't need the window exit notification then? I don't understand
> >what nmi_in_progress is supposed to do here.
> 
> We need the window notification in both cases.  If we're recovering
> from STI, then we don't need to collapse NMIs.  If we're completing
> an NMI handler, then we do need to collapse NMIs (since the queue
> length is two, and we just completed one).

I don't understand what is the point with nmi_in_progress, and the above
hunk, either. Can't inject_nmi do:

if (nmi_injected + atomic_read(nmi_pending) < 2)
    atomic_inc(nmi_pending)

Instead of collapsing somewhere else? You'd also have to change
nmi_injected handling in arch code so its value is not "hidden", in
complete_interrupts().

> >>
> >>>  If not, what will happen next?
> >>
> >>  The NMI window will open and we'll inject the NMI.
> >
> >How will we know this? We do not request the exit, that's my worry.
> 
> I think we do? Oh, but this patch breaks it.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-19 13:54         ` Marcelo Tosatti
@ 2011-09-19 14:30           ` Avi Kivity
  2011-09-19 14:54             ` Marcelo Tosatti
  0 siblings, 1 reply; 14+ messages in thread
From: Avi Kivity @ 2011-09-19 14:30 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Jan Kiszka, kvm@vger.kernel.org

On 09/19/2011 04:54 PM, Marcelo Tosatti wrote:
> >  >>
> >  >>   Yes, due to NMI-blocked-by-STI.  A really touchy area.
> >  >And we don't need the window exit notification then? I don't understand
> >  >what nmi_in_progress is supposed to do here.
> >
> >  We need the window notification in both cases.  If we're recovering
> >  from STI, then we don't need to collapse NMIs.  If we're completing
> >  an NMI handler, then we do need to collapse NMIs (since the queue
> >  length is two, and we just completed one).
>
> I don't understand what is the point with nmi_in_progress, and the above
> hunk, either. Can't inject_nmi do:
>
> if (nmi_injected + atomic_read(nmi_pending)<  2)
>      atomic_inc(nmi_pending)
>
> Instead of collapsing somewhere else?

We could.  It's not atomic though - two threads executing in parallel 
could raise the value to three.  Could do a cmpxchg loop does an 
increment bounded to two.  I guess this is a lot clearer, thanks.

> You'd also have to change
> nmi_injected handling in arch code so its value is not "hidden", in
> complete_interrupts().

Or maybe make raising nmi_injected not decrement nmi_pending.  So:

   nmi_pending: total number of interrupts in queue
   nmi_injected: of these, how many are currently being injected

yes?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-19 14:30           ` Avi Kivity
@ 2011-09-19 14:54             ` Marcelo Tosatti
  2011-09-19 15:09               ` Avi Kivity
  0 siblings, 1 reply; 14+ messages in thread
From: Marcelo Tosatti @ 2011-09-19 14:54 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jan Kiszka, kvm@vger.kernel.org

On Mon, Sep 19, 2011 at 05:30:27PM +0300, Avi Kivity wrote:
> On 09/19/2011 04:54 PM, Marcelo Tosatti wrote:
> >>  >>
> >>  >>   Yes, due to NMI-blocked-by-STI.  A really touchy area.
> >>  >And we don't need the window exit notification then? I don't understand
> >>  >what nmi_in_progress is supposed to do here.
> >>
> >>  We need the window notification in both cases.  If we're recovering
> >>  from STI, then we don't need to collapse NMIs.  If we're completing
> >>  an NMI handler, then we do need to collapse NMIs (since the queue
> >>  length is two, and we just completed one).
> >
> >I don't understand what is the point with nmi_in_progress, and the above
> >hunk, either. Can't inject_nmi do:
> >
> >if (nmi_injected + atomic_read(nmi_pending)<  2)
> >     atomic_inc(nmi_pending)
> >
> >Instead of collapsing somewhere else?
> 
> We could.  It's not atomic though - two threads executing in
> parallel could raise the value to three.  Could do a cmpxchg loop
> does an increment bounded to two.  I guess this is a lot clearer,
> thanks.
> 
> >You'd also have to change
> >nmi_injected handling in arch code so its value is not "hidden", in
> >complete_interrupts().
> 
> Or maybe make raising nmi_injected not decrement nmi_pending.  So:
> 
>   nmi_pending: total number of interrupts in queue
>   nmi_injected: of these, how many are currently being injected
> 
> yes?

Yes, at the expense of decrementing on subarch code (which is fine,
apparently).


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-19 14:54             ` Marcelo Tosatti
@ 2011-09-19 15:09               ` Avi Kivity
  2011-09-19 15:12                 ` Avi Kivity
  2011-09-19 15:22                 ` Marcelo Tosatti
  0 siblings, 2 replies; 14+ messages in thread
From: Avi Kivity @ 2011-09-19 15:09 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Jan Kiszka, kvm@vger.kernel.org

On 09/19/2011 05:54 PM, Marcelo Tosatti wrote:
> On Mon, Sep 19, 2011 at 05:30:27PM +0300, Avi Kivity wrote:
> >  On 09/19/2011 04:54 PM, Marcelo Tosatti wrote:
> >  >>   >>
> >  >>   >>    Yes, due to NMI-blocked-by-STI.  A really touchy area.
> >  >>   >And we don't need the window exit notification then? I don't understand
> >  >>   >what nmi_in_progress is supposed to do here.
> >  >>
> >  >>   We need the window notification in both cases.  If we're recovering
> >  >>   from STI, then we don't need to collapse NMIs.  If we're completing
> >  >>   an NMI handler, then we do need to collapse NMIs (since the queue
> >  >>   length is two, and we just completed one).
> >  >
> >  >I don't understand what is the point with nmi_in_progress, and the above
> >  >hunk, either. Can't inject_nmi do:
> >  >
> >  >if (nmi_injected + atomic_read(nmi_pending)<   2)
> >  >      atomic_inc(nmi_pending)
> >  >
> >  >Instead of collapsing somewhere else?
> >
> >  We could.  It's not atomic though - two threads executing in
> >  parallel could raise the value to three.  Could do a cmpxchg loop
> >  does an increment bounded to two.  I guess this is a lot clearer,
> >  thanks.
> >
> >  >You'd also have to change
> >  >nmi_injected handling in arch code so its value is not "hidden", in
> >  >complete_interrupts().
> >
> >  Or maybe make raising nmi_injected not decrement nmi_pending.  So:
> >
> >    nmi_pending: total number of interrupts in queue
> >    nmi_injected: of these, how many are currently being injected
> >
> >  yes?
>
> Yes, at the expense of decrementing on subarch code (which is fine,
> apparently).
>

Hm, we have no place to decrement.  We need to do that when IRET 
executes, but we don't want to request an NMI window exit in the common 
case of nmi_pending = 1.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-19 15:09               ` Avi Kivity
@ 2011-09-19 15:12                 ` Avi Kivity
  2011-09-19 15:22                 ` Marcelo Tosatti
  1 sibling, 0 replies; 14+ messages in thread
From: Avi Kivity @ 2011-09-19 15:12 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Jan Kiszka, kvm@vger.kernel.org

On 09/19/2011 06:09 PM, Avi Kivity wrote:
> On 09/19/2011 05:54 PM, Marcelo Tosatti wrote:
>> On Mon, Sep 19, 2011 at 05:30:27PM +0300, Avi Kivity wrote:
>> >  On 09/19/2011 04:54 PM, Marcelo Tosatti wrote:
>> > >> >>
>> > >> >>    Yes, due to NMI-blocked-by-STI.  A really touchy area.
>> > >> >And we don't need the window exit notification then? I don't 
>> understand
>> > >> >what nmi_in_progress is supposed to do here.
>> > >>
>> > >>   We need the window notification in both cases.  If we're 
>> recovering
>> > >>   from STI, then we don't need to collapse NMIs.  If we're 
>> completing
>> > >>   an NMI handler, then we do need to collapse NMIs (since the queue
>> > >>   length is two, and we just completed one).
>> > >
>> > >I don't understand what is the point with nmi_in_progress, and the 
>> above
>> > >hunk, either. Can't inject_nmi do:
>> > >
>> > >if (nmi_injected + atomic_read(nmi_pending)<   2)
>> > >      atomic_inc(nmi_pending)
>> > >
>> > >Instead of collapsing somewhere else?
>> >
>> >  We could.  It's not atomic though - two threads executing in
>> >  parallel could raise the value to three.  Could do a cmpxchg loop
>> >  does an increment bounded to two.  I guess this is a lot clearer,
>> >  thanks.
>> >
>> > >You'd also have to change
>> > >nmi_injected handling in arch code so its value is not "hidden", in
>> > >complete_interrupts().
>> >
>> >  Or maybe make raising nmi_injected not decrement nmi_pending.  So:
>> >
>> >    nmi_pending: total number of interrupts in queue
>> >    nmi_injected: of these, how many are currently being injected
>> >
>> >  yes?
>>
>> Yes, at the expense of decrementing on subarch code (which is fine,
>> apparently).
>>
>
> Hm, we have no place to decrement.  We need to do that when IRET 
> executes, but we don't want to request an NMI window exit in the 
> common case of nmi_pending = 1.
>

I guess we have to change kvm_inject_nmi to run in vcpu context, where 
it has access to more stuff.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-19 15:09               ` Avi Kivity
  2011-09-19 15:12                 ` Avi Kivity
@ 2011-09-19 15:22                 ` Marcelo Tosatti
  2011-09-19 15:37                   ` Avi Kivity
  1 sibling, 1 reply; 14+ messages in thread
From: Marcelo Tosatti @ 2011-09-19 15:22 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jan Kiszka, kvm@vger.kernel.org

On Mon, Sep 19, 2011 at 06:09:39PM +0300, Avi Kivity wrote:
> On 09/19/2011 05:54 PM, Marcelo Tosatti wrote:
> >On Mon, Sep 19, 2011 at 05:30:27PM +0300, Avi Kivity wrote:
> >>  On 09/19/2011 04:54 PM, Marcelo Tosatti wrote:
> >>  >>   >>
> >>  >>   >>    Yes, due to NMI-blocked-by-STI.  A really touchy area.
> >>  >>   >And we don't need the window exit notification then? I don't understand
> >>  >>   >what nmi_in_progress is supposed to do here.
> >>  >>
> >>  >>   We need the window notification in both cases.  If we're recovering
> >>  >>   from STI, then we don't need to collapse NMIs.  If we're completing
> >>  >>   an NMI handler, then we do need to collapse NMIs (since the queue
> >>  >>   length is two, and we just completed one).
> >>  >
> >>  >I don't understand what is the point with nmi_in_progress, and the above
> >>  >hunk, either. Can't inject_nmi do:
> >>  >
> >>  >if (nmi_injected + atomic_read(nmi_pending)<   2)
> >>  >      atomic_inc(nmi_pending)
> >>  >
> >>  >Instead of collapsing somewhere else?
> >>
> >>  We could.  It's not atomic though - two threads executing in
> >>  parallel could raise the value to three.  Could do a cmpxchg loop
> >>  does an increment bounded to two.  I guess this is a lot clearer,
> >>  thanks.
> >>
> >>  >You'd also have to change
> >>  >nmi_injected handling in arch code so its value is not "hidden", in
> >>  >complete_interrupts().
> >>
> >>  Or maybe make raising nmi_injected not decrement nmi_pending.  So:
> >>
> >>    nmi_pending: total number of interrupts in queue
> >>    nmi_injected: of these, how many are currently being injected
> >>
> >>  yes?
> >
> >Yes, at the expense of decrementing on subarch code (which is fine,
> >apparently).
> >
> 
> Hm, we have no place to decrement. 

Decrement when setting nmi_injected = false, increment when setting
nmi_injected = true, in vmx/svm.c.

>  We need to do that when IRET
> executes, but we don't want to request an NMI window exit in the
> common case of nmi_pending = 1.

Do not enable nmi window if nmi_injected = true?


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-19 15:22                 ` Marcelo Tosatti
@ 2011-09-19 15:37                   ` Avi Kivity
  2011-09-19 15:57                     ` Marcelo Tosatti
  0 siblings, 1 reply; 14+ messages in thread
From: Avi Kivity @ 2011-09-19 15:37 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Jan Kiszka, kvm@vger.kernel.org

On 09/19/2011 06:22 PM, Marcelo Tosatti wrote:
> On Mon, Sep 19, 2011 at 06:09:39PM +0300, Avi Kivity wrote:
> >  On 09/19/2011 05:54 PM, Marcelo Tosatti wrote:
> >  >On Mon, Sep 19, 2011 at 05:30:27PM +0300, Avi Kivity wrote:
> >  >>   On 09/19/2011 04:54 PM, Marcelo Tosatti wrote:
> >  >>   >>    >>
> >  >>   >>    >>     Yes, due to NMI-blocked-by-STI.  A really touchy area.
> >  >>   >>    >And we don't need the window exit notification then? I don't understand
> >  >>   >>    >what nmi_in_progress is supposed to do here.
> >  >>   >>
> >  >>   >>    We need the window notification in both cases.  If we're recovering
> >  >>   >>    from STI, then we don't need to collapse NMIs.  If we're completing
> >  >>   >>    an NMI handler, then we do need to collapse NMIs (since the queue
> >  >>   >>    length is two, and we just completed one).
> >  >>   >
> >  >>   >I don't understand what is the point with nmi_in_progress, and the above
> >  >>   >hunk, either. Can't inject_nmi do:
> >  >>   >
> >  >>   >if (nmi_injected + atomic_read(nmi_pending)<    2)
> >  >>   >       atomic_inc(nmi_pending)
> >  >>   >
> >  >>   >Instead of collapsing somewhere else?
> >  >>
> >  >>   We could.  It's not atomic though - two threads executing in
> >  >>   parallel could raise the value to three.  Could do a cmpxchg loop
> >  >>   does an increment bounded to two.  I guess this is a lot clearer,
> >  >>   thanks.
> >  >>
> >  >>   >You'd also have to change
> >  >>   >nmi_injected handling in arch code so its value is not "hidden", in
> >  >>   >complete_interrupts().
> >  >>
> >  >>   Or maybe make raising nmi_injected not decrement nmi_pending.  So:
> >  >>
> >  >>     nmi_pending: total number of interrupts in queue
> >  >>     nmi_injected: of these, how many are currently being injected
> >  >>
> >  >>   yes?
> >  >
> >  >Yes, at the expense of decrementing on subarch code (which is fine,
> >  >apparently).
> >  >
> >
> >  Hm, we have no place to decrement.
>
> Decrement when setting nmi_injected = false, increment when setting
> nmi_injected = true, in vmx/svm.c.

That gives a queue length of 3: one running nmi and nmi_pending = 2.

> >   We need to do that when IRET
> >  executes, but we don't want to request an NMI window exit in the
> >  common case of nmi_pending = 1.
>
> Do not enable nmi window if nmi_injected = true?
>

We have to, since we need a back-to-back nmi if the queue length > 1 
(including the running nmi).

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-19 15:37                   ` Avi Kivity
@ 2011-09-19 15:57                     ` Marcelo Tosatti
  2011-09-20  8:40                       ` Avi Kivity
  0 siblings, 1 reply; 14+ messages in thread
From: Marcelo Tosatti @ 2011-09-19 15:57 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jan Kiszka, kvm@vger.kernel.org

On Mon, Sep 19, 2011 at 06:37:35PM +0300, Avi Kivity wrote:
> On 09/19/2011 06:22 PM, Marcelo Tosatti wrote:
> >On Mon, Sep 19, 2011 at 06:09:39PM +0300, Avi Kivity wrote:
> >>  On 09/19/2011 05:54 PM, Marcelo Tosatti wrote:
> >>  >On Mon, Sep 19, 2011 at 05:30:27PM +0300, Avi Kivity wrote:
> >>  >>   On 09/19/2011 04:54 PM, Marcelo Tosatti wrote:
> >>  >>   >>    >>
> >>  >>   >>    >>     Yes, due to NMI-blocked-by-STI.  A really touchy area.
> >>  >>   >>    >And we don't need the window exit notification then? I don't understand
> >>  >>   >>    >what nmi_in_progress is supposed to do here.
> >>  >>   >>
> >>  >>   >>    We need the window notification in both cases.  If we're recovering
> >>  >>   >>    from STI, then we don't need to collapse NMIs.  If we're completing
> >>  >>   >>    an NMI handler, then we do need to collapse NMIs (since the queue
> >>  >>   >>    length is two, and we just completed one).
> >>  >>   >
> >>  >>   >I don't understand what is the point with nmi_in_progress, and the above
> >>  >>   >hunk, either. Can't inject_nmi do:
> >>  >>   >
> >>  >>   >if (nmi_injected + atomic_read(nmi_pending)<    2)
> >>  >>   >       atomic_inc(nmi_pending)
> >>  >>   >
> >>  >>   >Instead of collapsing somewhere else?
> >>  >>
> >>  >>   We could.  It's not atomic though - two threads executing in
> >>  >>   parallel could raise the value to three.  Could do a cmpxchg loop
> >>  >>   does an increment bounded to two.  I guess this is a lot clearer,
> >>  >>   thanks.
> >>  >>
> >>  >>   >You'd also have to change
> >>  >>   >nmi_injected handling in arch code so its value is not "hidden", in
> >>  >>   >complete_interrupts().
> >>  >>
> >>  >>   Or maybe make raising nmi_injected not decrement nmi_pending.  So:
> >>  >>
> >>  >>     nmi_pending: total number of interrupts in queue
> >>  >>     nmi_injected: of these, how many are currently being injected
> >>  >>
> >>  >>   yes?
> >>  >
> >>  >Yes, at the expense of decrementing on subarch code (which is fine,
> >>  >apparently).
> >>  >
> >>
> >>  Hm, we have no place to decrement.
> >
> >Decrement when setting nmi_injected = false, increment when setting
> >nmi_injected = true, in vmx/svm.c.
> 
> That gives a queue length of 3: one running nmi and nmi_pending = 2.

Increment through the same wrapper that will collapse the second and next, also
used by kvm_inject_nmi.

> >>   We need to do that when IRET
> >>  executes, but we don't want to request an NMI window exit in the
> >>  common case of nmi_pending = 1.
> >
> >Do not enable nmi window if nmi_injected = true?
> >
> 
> We have to, since we need a back-to-back nmi if the queue length > 1
> (including the running nmi).
> 
> -- 
> error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC] KVM: Fix simultaneous NMIs
  2011-09-19 15:57                     ` Marcelo Tosatti
@ 2011-09-20  8:40                       ` Avi Kivity
  0 siblings, 0 replies; 14+ messages in thread
From: Avi Kivity @ 2011-09-20  8:40 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: Jan Kiszka, kvm@vger.kernel.org

On 09/19/2011 06:57 PM, Marcelo Tosatti wrote:
> >  >Decrement when setting nmi_injected = false, increment when setting
> >  >nmi_injected = true, in vmx/svm.c.
> >
> >  That gives a queue length of 3: one running nmi and nmi_pending = 2.
>
> Increment through the same wrapper that will collapse the second and next, also
> used by kvm_inject_nmi.
>
>

Cannot be done outside vcpu context.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2011-09-20  8:40 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-09-15 14:45 [RFC] KVM: Fix simultaneous NMIs Avi Kivity
2011-09-15 16:01 ` Jan Kiszka
2011-09-15 17:02   ` Avi Kivity
2011-09-15 17:25     ` Jan Kiszka
2011-09-15 17:48       ` Avi Kivity
2011-09-19 13:54         ` Marcelo Tosatti
2011-09-19 14:30           ` Avi Kivity
2011-09-19 14:54             ` Marcelo Tosatti
2011-09-19 15:09               ` Avi Kivity
2011-09-19 15:12                 ` Avi Kivity
2011-09-19 15:22                 ` Marcelo Tosatti
2011-09-19 15:37                   ` Avi Kivity
2011-09-19 15:57                     ` Marcelo Tosatti
2011-09-20  8:40                       ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).