Re: [RFC] KVM: Fix simultaneous NMIs

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Jan Kiszka <jan.kiszka@siemens.com>
To: Avi Kivity <avi@redhat.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>,
	"kvm@vger.kernel.org" <kvm@vger.kernel.org>
Subject: Re: [RFC] KVM: Fix simultaneous NMIs
Date: Thu, 15 Sep 2011 18:01:04 +0200	[thread overview]
Message-ID: <4E722140.4070702@siemens.com> (raw)
In-Reply-To: <1316097911-16424-1-git-send-email-avi@redhat.com>

On 2011-09-15 16:45, Avi Kivity wrote:
> If simultaneous NMIs happen, we're supposed to queue the second
> and next (collapsing them), but currently we sometimes collapse
> the second into the first.

Can you describe the race in a few more details here ("sometimes" sounds
like "I don't know when" :) )?

> 
> Fix by using a counter for pending NMIs instead of a bool; collapsing
> happens when the NMI window reopens.
> 
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
> 
> Not sure whether this interacts correctly with NMI-masked-by-STI or with
> save/restore.
> 
>  arch/x86/include/asm/kvm_host.h |    2 +-
>  arch/x86/kvm/svm.c              |    1 +
>  arch/x86/kvm/vmx.c              |    3 ++-
>  arch/x86/kvm/x86.c              |   33 +++++++++++++++------------------
>  arch/x86/kvm/x86.h              |    7 +++++++
>  5 files changed, 26 insertions(+), 20 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6ab4241..3a95885 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -413,7 +413,7 @@ struct kvm_vcpu_arch {
>  	u32  tsc_catchup_mult;
>  	s8   tsc_catchup_shift;
>  
> -	bool nmi_pending;
> +	atomic_t nmi_pending;
>  	bool nmi_injected;
>  
>  	struct mtrr_state_type mtrr_state;
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index e7ed4b1..d4c792f 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -3609,6 +3609,7 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
>  	if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
>  	    && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
>  		svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
> +		kvm_collapse_pending_nmis(&svm->vcpu);
>  		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
>  	}
>  
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index a0d6bd9..745dadb 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4761,6 +4761,7 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu)
>  	cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
>  	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
>  	++vcpu->stat.nmi_window_exits;
> +	kvm_collapse_pending_nmis(vcpu);
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
>  
>  	return 1;
> @@ -5790,7 +5791,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
>  		if (vmx_interrupt_allowed(vcpu)) {
>  			vmx->soft_vnmi_blocked = 0;
>  		} else if (vmx->vnmi_blocked_time > 1000000000LL &&
> -			   vcpu->arch.nmi_pending) {
> +			   atomic_read(&vcpu->arch.nmi_pending)) {
>  			/*
>  			 * This CPU don't support us in finding the end of an
>  			 * NMI-blocked window if the guest runs with IRQs
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 6b37f18..d4f45e0 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -359,8 +359,8 @@ void kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
>  
>  void kvm_inject_nmi(struct kvm_vcpu *vcpu)
>  {
> +	atomic_inc(&vcpu->arch.nmi_pending);
>  	kvm_make_request(KVM_REQ_EVENT, vcpu);
> -	vcpu->arch.nmi_pending = 1;

Does the reordering matter? Do we need barriers?

>  }
>  EXPORT_SYMBOL_GPL(kvm_inject_nmi);
>  
> @@ -2844,7 +2844,7 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
>  			KVM_X86_SHADOW_INT_MOV_SS | KVM_X86_SHADOW_INT_STI);
>  
>  	events->nmi.injected = vcpu->arch.nmi_injected;
> -	events->nmi.pending = vcpu->arch.nmi_pending;
> +	events->nmi.pending = atomic_read(&vcpu->arch.nmi_pending) != 0;
>  	events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
>  	events->nmi.pad = 0;
>  
> @@ -2878,7 +2878,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
>  
>  	vcpu->arch.nmi_injected = events->nmi.injected;
>  	if (events->flags & KVM_VCPUEVENT_VALID_NMI_PENDING)
> -		vcpu->arch.nmi_pending = events->nmi.pending;
> +		atomic_set(&vcpu->arch.nmi_pending, events->nmi.pending);
>  	kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
>  
>  	if (events->flags & KVM_VCPUEVENT_VALID_SIPI_VECTOR)
> @@ -4763,7 +4763,7 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
>  	kvm_set_rflags(vcpu, ctxt->eflags);
>  
>  	if (irq == NMI_VECTOR)
> -		vcpu->arch.nmi_pending = false;
> +		atomic_set(&vcpu->arch.nmi_pending, 0);
>  	else
>  		vcpu->arch.interrupt.pending = false;
>  
> @@ -5570,9 +5570,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
>  	}
>  
>  	/* try to inject new event if pending */
> -	if (vcpu->arch.nmi_pending) {
> +	if (atomic_read(&vcpu->arch.nmi_pending)) {
>  		if (kvm_x86_ops->nmi_allowed(vcpu)) {
> -			vcpu->arch.nmi_pending = false;
> +			atomic_dec(&vcpu->arch.nmi_pending);

Here we lost NMIs in the past by overwriting nmi_pending while another
one was already queued, right?

>  			vcpu->arch.nmi_injected = true;
>  			kvm_x86_ops->set_nmi(vcpu);
>  		}
> @@ -5604,10 +5604,14 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu)
>  	}
>  }
>  
> +static bool nmi_in_progress(struct kvm_vcpu *vcpu)
> +{
> +	return vcpu->arch.nmi_injected || kvm_x86_ops->get_nmi_mask(vcpu);
> +}
> +
>  static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  {
>  	int r;
> -	bool nmi_pending;
>  	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
>  		vcpu->run->request_interrupt_window;
>  
> @@ -5654,19 +5658,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  	if (unlikely(r))
>  		goto out;
>  
> -	/*
> -	 * An NMI can be injected between local nmi_pending read and
> -	 * vcpu->arch.nmi_pending read inside inject_pending_event().
> -	 * But in that case, KVM_REQ_EVENT will be set, which makes
> -	 * the race described above benign.
> -	 */
> -	nmi_pending = ACCESS_ONCE(vcpu->arch.nmi_pending);
> -
>  	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
>  		inject_pending_event(vcpu);
>  
>  		/* enable NMI/IRQ window open exits if needed */
> -		if (nmi_pending)
> +		if (atomic_read(&vcpu->arch.nmi_pending)
> +		    && nmi_in_progress(vcpu))

Is nmi_pending && !nmi_in_progress possible at all? Is it rather a BUG
condition? If not, what will happen next?

>  			kvm_x86_ops->enable_nmi_window(vcpu);
>  		else if (kvm_cpu_has_interrupt(vcpu) || req_int_win)
>  			kvm_x86_ops->enable_irq_window(vcpu);
> @@ -6374,7 +6371,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
>  
>  int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
>  {
> -	vcpu->arch.nmi_pending = false;
> +	atomic_set(&vcpu->arch.nmi_pending, 0);
>  	vcpu->arch.nmi_injected = false;
>  
>  	vcpu->arch.switch_db_regs = 0;
> @@ -6649,7 +6646,7 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
>  		!vcpu->arch.apf.halted)
>  		|| !list_empty_careful(&vcpu->async_pf.done)
>  		|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
> -		|| vcpu->arch.nmi_pending ||
> +		|| atomic_read(&vcpu->arch.nmi_pending) ||
>  		(kvm_arch_interrupt_allowed(vcpu) &&
>  		 kvm_cpu_has_interrupt(vcpu));
>  }
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index d36fe23..ed04e2b 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -125,4 +125,11 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
>  	gva_t addr, void *val, unsigned int bytes,
>  	struct x86_exception *exception);
>  
> +static inline void kvm_collapse_pending_nmis(struct kvm_vcpu *vcpu)
> +{
> +	/* Collapse all NMIs queued while an NMI handler was running to one */
> +	if (atomic_read(&vcpu->arch.nmi_pending))
> +		atomic_set(&vcpu->arch.nmi_pending, 1);

Is it OK that NMIs injected after the collapse will increment this to >
1 again? Or is that impossible?

> +}
> +
>  #endif

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

next prev parent reply	other threads:[~2011-09-15 16:01 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-09-15 14:45 [RFC] KVM: Fix simultaneous NMIs Avi Kivity
2011-09-15 16:01 ` Jan Kiszka [this message]
2011-09-15 17:02   ` Avi Kivity
2011-09-15 17:25     ` Jan Kiszka
2011-09-15 17:48       ` Avi Kivity
2011-09-19 13:54         ` Marcelo Tosatti
2011-09-19 14:30           ` Avi Kivity
2011-09-19 14:54             ` Marcelo Tosatti
2011-09-19 15:09               ` Avi Kivity
2011-09-19 15:12                 ` Avi Kivity
2011-09-19 15:22                 ` Marcelo Tosatti
2011-09-19 15:37                   ` Avi Kivity
2011-09-19 15:57                     ` Marcelo Tosatti
2011-09-20  8:40                       ` Avi Kivity

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4E722140.4070702@siemens.com \
    --to=jan.kiszka@siemens.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.