From mboxrd@z Thu Jan  1 00:00:00 1970
From: Paolo Bonzini <pbonzini@redhat.com>
Subject: Re: [PATCH 11/12] KVM: nVMX: Rework interception of IRQs and NMIs
Date: Thu, 16 Jan 2014 16:08:30 +0100
Message-ID: <52D7F5EE.1060203@redhat.com>
References: <cover.1388857646.git.jan.kiszka@web.de> <2f1243e9d3b0abe0b876b81e89de96cce9cecab0.1388857646.git.jan.kiszka@web.de>
Mime-Version: 1.0
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: 7bit
Cc: Gleb Natapov <gleb@kernel.org>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	kvm <kvm@vger.kernel.org>
To: Jan Kiszka <jan.kiszka@web.de>
Return-path: <kvm-owner@vger.kernel.org>
Received: from mail-qa0-f45.google.com ([209.85.216.45]:53401 "EHLO
	mail-qa0-f45.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org
	with ESMTP id S1752256AbaAPPIf (ORCPT <rfc822;kvm@vger.kernel.org>);
	Thu, 16 Jan 2014 10:08:35 -0500
Received: by mail-qa0-f45.google.com with SMTP id ii20so2205779qab.18
        for <kvm@vger.kernel.org>; Thu, 16 Jan 2014 07:08:35 -0800 (PST)
In-Reply-To: <2f1243e9d3b0abe0b876b81e89de96cce9cecab0.1388857646.git.jan.kiszka@web.de>
Sender: kvm-owner@vger.kernel.org
List-ID: <kvm.vger.kernel.org>

Il 04/01/2014 18:47, Jan Kiszka ha scritto:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> Move the check for leaving L2 on pending and intercepted IRQs or NMIs
> from the *_allowed handler into a dedicated callback. Invoke this
> callback at the relevant points before KVM checks if IRQs/NMIs can be
> injected. The callback has the task to switch from L2 to L1 if needed
> and inject the proper vmexit events.
> 
> The rework fixes L2 wakeups from HLT and provides the foundation for
> preemption timer emulation.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  2 ++
>  arch/x86/kvm/vmx.c              | 67 +++++++++++++++++++++++------------------
>  arch/x86/kvm/x86.c              | 15 +++++++--
>  3 files changed, 53 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index e73651b..d195421 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -764,6 +764,8 @@ struct kvm_x86_ops {
>  			       struct x86_instruction_info *info,
>  			       enum x86_intercept_stage stage);
>  	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
> +
> +	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
>  };
>  
>  struct kvm_arch_async_pf {
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 1245ff1..ec8a976 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -4620,22 +4620,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
>  
>  static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>  {
> -	if (is_guest_mode(vcpu)) {
> -		if (to_vmx(vcpu)->nested.nested_run_pending)
> -			return 0;
> -		if (nested_exit_on_nmi(vcpu)) {
> -			nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
> -					  NMI_VECTOR | INTR_TYPE_NMI_INTR |
> -					  INTR_INFO_VALID_MASK, 0);
> -			/*
> -			 * The NMI-triggered VM exit counts as injection:
> -			 * clear this one and block further NMIs.
> -			 */
> -			vcpu->arch.nmi_pending = 0;
> -			vmx_set_nmi_mask(vcpu, true);
> -			return 0;
> -		}
> -	}
> +	if (to_vmx(vcpu)->nested.nested_run_pending)
> +		return 0;
>  
>  	if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
>  		return 0;
> @@ -4647,19 +4633,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
>  
>  static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
>  {
> -	if (is_guest_mode(vcpu)) {
> -		if (to_vmx(vcpu)->nested.nested_run_pending)
> -			return 0;
> -		if (nested_exit_on_intr(vcpu)) {
> -			nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
> -					  0, 0);
> -			/*
> -			 * fall through to normal code, but now in L1, not L2
> -			 */
> -		}
> -	}
> -
> -	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
> +	return (!to_vmx(vcpu)->nested.nested_run_pending &&
> +		vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
>  		!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
>  			(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS));
>  }
> @@ -8158,6 +8133,35 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
>  	}
>  }
>  
> +static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr)
> +{
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
> +	if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) {
> +		if (vmx->nested.nested_run_pending)
> +			return -EBUSY;
> +		nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
> +				  NMI_VECTOR | INTR_TYPE_NMI_INTR |
> +				  INTR_INFO_VALID_MASK, 0);
> +		/*
> +		 * The NMI-triggered VM exit counts as injection:
> +		 * clear this one and block further NMIs.
> +		 */
> +		vcpu->arch.nmi_pending = 0;
> +		vmx_set_nmi_mask(vcpu, true);
> +		return 0;
> +	}
> +
> +	if ((kvm_cpu_has_interrupt(vcpu) || external_intr) &&
> +	    nested_exit_on_intr(vcpu)) {
> +		if (vmx->nested.nested_run_pending)
> +			return -EBUSY;
> +		nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
>   * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
> @@ -8498,6 +8502,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
>  		nested_vmx_succeed(vcpu);
>  	if (enable_shadow_vmcs)
>  		vmx->nested.sync_shadow_vmcs = true;
> +
> +	/* in case we halted in L2 */
> +	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
>  }
>  
>  /*
> @@ -8637,6 +8644,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
>  
>  	.check_intercept = vmx_check_intercept,
>  	.handle_external_intr = vmx_handle_external_intr,
> +
> +	.check_nested_events = vmx_check_nested_events,
>  };
>  
>  static int __init vmx_init(void)
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 559ae75..8746b7e 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -5846,6 +5846,9 @@ static void inject_pending_event(struct kvm_vcpu *vcpu)
>  		return;
>  	}
>  
> +	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
> +		kvm_x86_ops->check_nested_events(vcpu, false);
> +
>  	/* try to inject new event if pending */
>  	if (vcpu->arch.nmi_pending) {
>  		if (kvm_x86_ops->nmi_allowed(vcpu)) {
> @@ -5966,12 +5969,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>  
>  		inject_pending_event(vcpu);
>  
> +		if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
> +			req_immediate_exit |=
> +				kvm_x86_ops->check_nested_events(vcpu,
> +								 req_int_win);

Please add "!= 0" like below.  For now I only have this cosmetic
comment, I may have more questions when I port SVM to the new framework.

Thanks,

Paolo

>  		/* enable NMI/IRQ window open exits if needed */
>  		if (vcpu->arch.nmi_pending)
> -			req_immediate_exit =
> +			req_immediate_exit |=
>  				kvm_x86_ops->enable_nmi_window(vcpu) != 0;
>  		else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win)
> -			req_immediate_exit =
> +			req_immediate_exit |=
>  				kvm_x86_ops->enable_irq_window(vcpu) != 0;
>  
>  		if (kvm_lapic_enabled(vcpu)) {
> @@ -7295,6 +7303,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
>  
>  int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
>  {
> +	if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events)
> +		kvm_x86_ops->check_nested_events(vcpu, false);
> +
>  	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
>  		!vcpu->arch.apf.halted)
>  		|| !list_empty_careful(&vcpu->async_pf.done)
>