public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Zhi Wang <zhi.wang.linux@gmail.com>
To: Kechen Lu <kechenl@nvidia.com>
Cc: <kvm@vger.kernel.org>, <seanjc@google.com>, <pbonzini@redhat.com>,
	<chao.gao@intel.com>, <shaoqin.huang@intel.com>,
	<vkuznets@redhat.com>, <linux-kernel@vger.kernel.org>
Subject: Re: [RFC PATCH v6 2/6] KVM: x86: Move *_in_guest power management flags to vCPU scope
Date: Thu, 2 Feb 2023 16:56:43 +0200	[thread overview]
Message-ID: <20230202165643.00003a3b@gmail.com> (raw)
In-Reply-To: <20230121020738.2973-3-kechenl@nvidia.com>

On Sat, 21 Jan 2023 02:07:34 +0000
Kechen Lu <kechenl@nvidia.com> wrote:

> Make the runtime disabled mwait/hlt/pause/cstate exits flags vCPU scope
> to allow finer-grained, per-vCPU control.  The VM-scoped control is only
> allowed before vCPUs are created, thus preserving the existing behavior
> is a simple matter of snapshotting the flags at vCPU creation.
> 
> Signed-off-by: Kechen Lu <kechenl@nvidia.com>
> Suggested-by: Sean Christopherson <seanjc@google.com>
> Reviewed-by: Sean Christopherson <seanjc@google.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  5 +++++
>  arch/x86/kvm/cpuid.c            |  4 ++--
>  arch/x86/kvm/lapic.c            |  7 +++----
>  arch/x86/kvm/svm/nested.c       |  4 ++--
>  arch/x86/kvm/svm/svm.c          | 12 ++++++------
>  arch/x86/kvm/vmx/vmx.c          | 16 ++++++++--------
>  arch/x86/kvm/x86.c              |  6 +++++-
>  arch/x86/kvm/x86.h              | 16 ++++++++--------
>  8 files changed, 39 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 6aaae18f1854..41b998234a04 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1009,6 +1009,11 @@ struct kvm_vcpu_arch {
>  #if IS_ENABLED(CONFIG_HYPERV)
>  	hpa_t hv_root_tdp;
>  #endif
> +
> +	bool mwait_in_guest;
> +	bool hlt_in_guest;
> +	bool pause_in_guest;
> +	bool cstate_in_guest;

Better add some comments here. When xxx_in_guest stays together with
XXX_DISABLE_EXIT_XXX, it can be quite confusing. Or maybe align the naming
like bool disable_exit_mwait <-> XXX_DISABLE_EXIT_XXX.

>  };
>  
>  struct kvm_lpage_info {
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 596061c1610e..20e427dc608c 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -283,8 +283,8 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e
>  		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
>  
>  	best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent);
> -	if (kvm_hlt_in_guest(vcpu->kvm) && best &&
> -		(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
> +	if (kvm_hlt_in_guest(vcpu) &&
> +	    best && (best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
>  		best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
>  
>  	if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
> diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
> index 4efdb4a4d72c..f0f49d0c6e69 100644
> --- a/arch/x86/kvm/lapic.c
> +++ b/arch/x86/kvm/lapic.c
> @@ -151,14 +151,13 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
>  static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
>  {
>  	return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
> -		(kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
> +		(kvm_mwait_in_guest(vcpu) || kvm_hlt_in_guest(vcpu));
>  }
>  
>  bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
>  {
> -	return kvm_x86_ops.set_hv_timer
> -	       && !(kvm_mwait_in_guest(vcpu->kvm) ||
> -		    kvm_can_post_timer_interrupt(vcpu));
> +	return kvm_x86_ops.set_hv_timer &&
> +		!(kvm_mwait_in_guest(vcpu) || kvm_can_post_timer_interrupt(vcpu));
>  }
>  
>  static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index add65dd59756..ed26b6de3007 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -721,7 +721,7 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm,
>  
>  	pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
>  	pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
> -	if (kvm_pause_in_guest(svm->vcpu.kvm)) {
> +	if (kvm_pause_in_guest(&svm->vcpu)) {
>  		/* use guest values since host doesn't intercept PAUSE */
>  		vmcb02->control.pause_filter_count = pause_count12;
>  		vmcb02->control.pause_filter_thresh = pause_thresh12;
> @@ -1012,7 +1012,7 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
>  	vmcb12->control.event_inj         = svm->nested.ctl.event_inj;
>  	vmcb12->control.event_inj_err     = svm->nested.ctl.event_inj_err;
>  
> -	if (!kvm_pause_in_guest(vcpu->kvm)) {
> +	if (!kvm_pause_in_guest(vcpu)) {
>  		vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
>  		vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
>  
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index 9a194aa1a75a..dc7176605e01 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1014,7 +1014,7 @@ static void grow_ple_window(struct kvm_vcpu *vcpu)
>  	struct vmcb_control_area *control = &svm->vmcb->control;
>  	int old = control->pause_filter_count;
>  
> -	if (kvm_pause_in_guest(vcpu->kvm))
> +	if (kvm_pause_in_guest(vcpu))
>  		return;
>  
>  	control->pause_filter_count = __grow_ple_window(old,
> @@ -1035,7 +1035,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
>  	struct vmcb_control_area *control = &svm->vmcb->control;
>  	int old = control->pause_filter_count;
>  
> -	if (kvm_pause_in_guest(vcpu->kvm))
> +	if (kvm_pause_in_guest(vcpu))
>  		return;
>  
>  	control->pause_filter_count =
> @@ -1229,12 +1229,12 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
>  	svm_set_intercept(svm, INTERCEPT_RDPRU);
>  	svm_set_intercept(svm, INTERCEPT_RSM);
>  
> -	if (!kvm_mwait_in_guest(vcpu->kvm)) {
> +	if (!kvm_mwait_in_guest(vcpu)) {
>  		svm_set_intercept(svm, INTERCEPT_MONITOR);
>  		svm_set_intercept(svm, INTERCEPT_MWAIT);
>  	}
>  
> -	if (!kvm_hlt_in_guest(vcpu->kvm))
> +	if (!kvm_hlt_in_guest(vcpu))
>  		svm_set_intercept(svm, INTERCEPT_HLT);
>  
>  	control->iopm_base_pa = __sme_set(iopm_base);
> @@ -1278,7 +1278,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
>  	svm->nested.vmcb12_gpa = INVALID_GPA;
>  	svm->nested.last_vmcb12_gpa = INVALID_GPA;
>  
> -	if (!kvm_pause_in_guest(vcpu->kvm)) {
> +	if (!kvm_pause_in_guest(vcpu)) {
>  		control->pause_filter_count = pause_filter_count;
>  		if (pause_filter_thresh)
>  			control->pause_filter_thresh = pause_filter_thresh;
> @@ -4362,7 +4362,7 @@ static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
>  
>  static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
>  {
> -	if (!kvm_pause_in_guest(vcpu->kvm))
> +	if (!kvm_pause_in_guest(vcpu))
>  		shrink_ple_window(vcpu);
>  }
>  
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index fc9008dbed33..019a20029878 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -1689,7 +1689,7 @@ static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
>  	 * then the instruction is already executing and RIP has already been
>  	 * advanced.
>  	 */
> -	if (kvm_hlt_in_guest(vcpu->kvm) &&
> +	if (kvm_hlt_in_guest(vcpu) &&
>  			vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
>  		vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
>  }
> @@ -4412,10 +4412,10 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
>  		exec_control &= ~(CPU_BASED_CR3_LOAD_EXITING |
>  				  CPU_BASED_CR3_STORE_EXITING |
>  				  CPU_BASED_INVLPG_EXITING);
> -	if (kvm_mwait_in_guest(vmx->vcpu.kvm))
> +	if (kvm_mwait_in_guest(&vmx->vcpu))
>  		exec_control &= ~(CPU_BASED_MWAIT_EXITING |
>  				CPU_BASED_MONITOR_EXITING);
> -	if (kvm_hlt_in_guest(vmx->vcpu.kvm))
> +	if (kvm_hlt_in_guest(&vmx->vcpu))
>  		exec_control &= ~CPU_BASED_HLT_EXITING;
>  	return exec_control;
>  }
> @@ -4515,7 +4515,7 @@ static u32 vmx_secondary_exec_control(struct vcpu_vmx *vmx)
>  	}
>  	if (!enable_unrestricted_guest)
>  		exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
> -	if (kvm_pause_in_guest(vmx->vcpu.kvm))
> +	if (kvm_pause_in_guest(&vmx->vcpu))
>  		exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
>  	if (!kvm_vcpu_apicv_active(vcpu))
>  		exec_control &= ~(SECONDARY_EXEC_APIC_REGISTER_VIRT |
> @@ -4661,7 +4661,7 @@ static void init_vmcs(struct vcpu_vmx *vmx)
>  		vmcs_write16(LAST_PID_POINTER_INDEX, kvm->arch.max_vcpu_ids - 1);
>  	}
>  
> -	if (!kvm_pause_in_guest(kvm)) {
> +	if (!kvm_pause_in_guest(&vmx->vcpu)) {
>  		vmcs_write32(PLE_GAP, ple_gap);
>  		vmx->ple_window = ple_window;
>  		vmx->ple_window_dirty = true;
> @@ -5833,7 +5833,7 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu)
>   */
>  static int handle_pause(struct kvm_vcpu *vcpu)
>  {
> -	if (!kvm_pause_in_guest(vcpu->kvm))
> +	if (!kvm_pause_in_guest(vcpu))
>  		grow_ple_window(vcpu);
>  
>  	/*
> @@ -7379,7 +7379,7 @@ static int vmx_vcpu_create(struct kvm_vcpu *vcpu)
>  	vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
>  	vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
>  	vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
> -	if (kvm_cstate_in_guest(vcpu->kvm)) {
> +	if (kvm_cstate_in_guest(vcpu)) {
>  		vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
>  		vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
>  		vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
> @@ -7935,7 +7935,7 @@ static void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu)
>  
>  static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
>  {
> -	if (!kvm_pause_in_guest(vcpu->kvm))
> +	if (!kvm_pause_in_guest(vcpu))
>  		shrink_ple_window(vcpu);
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c8ae9c4f9f08..9a77b55142c6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -11634,6 +11634,10 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
>  #if IS_ENABLED(CONFIG_HYPERV)
>  	vcpu->arch.hv_root_tdp = INVALID_PAGE;
>  #endif
> +	vcpu->arch.mwait_in_guest = vcpu->kvm->arch.mwait_in_guest;
> +	vcpu->arch.hlt_in_guest = vcpu->kvm->arch.hlt_in_guest;
> +	vcpu->arch.pause_in_guest = vcpu->kvm->arch.pause_in_guest;
> +	vcpu->arch.cstate_in_guest = vcpu->kvm->arch.cstate_in_guest;
>  
>  	r = static_call(kvm_x86_vcpu_create)(vcpu);
>  	if (r)
> @@ -12885,7 +12889,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
>  		     kvm_is_exception_pending(vcpu)))
>  		return false;
>  
> -	if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
> +	if (kvm_hlt_in_guest(vcpu) && !kvm_can_deliver_async_pf(vcpu))
>  		return false;
>  
>  	/*
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index 9de72586f406..b8e49a9d353d 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -351,24 +351,24 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
>  	    __rem;						\
>  	 })
>  
> -static inline bool kvm_mwait_in_guest(struct kvm *kvm)
> +static inline bool kvm_mwait_in_guest(struct kvm_vcpu *vcpu)
>  {
> -	return kvm->arch.mwait_in_guest;
> +	return vcpu->arch.mwait_in_guest;
>  }
>  
> -static inline bool kvm_hlt_in_guest(struct kvm *kvm)
> +static inline bool kvm_hlt_in_guest(struct kvm_vcpu *vcpu)
>  {
> -	return kvm->arch.hlt_in_guest;
> +	return vcpu->arch.hlt_in_guest;
>  }
>  
> -static inline bool kvm_pause_in_guest(struct kvm *kvm)
> +static inline bool kvm_pause_in_guest(struct kvm_vcpu *vcpu)
>  {
> -	return kvm->arch.pause_in_guest;
> +	return vcpu->arch.pause_in_guest;
>  }
>  
> -static inline bool kvm_cstate_in_guest(struct kvm *kvm)
> +static inline bool kvm_cstate_in_guest(struct kvm_vcpu *vcpu)
>  {
> -	return kvm->arch.cstate_in_guest;
> +	return vcpu->arch.cstate_in_guest;
>  }
>  
>  static inline bool kvm_notify_vmexit_enabled(struct kvm *kvm)


  reply	other threads:[~2023-02-02 14:57 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-21  2:07 [RFC PATCH v6 0/6] KVM: x86: add per-vCPU exits disable capability Kechen Lu
2023-01-21  2:07 ` [RFC PATCH v6 1/6] KVM: x86: only allow exits disable before vCPUs created Kechen Lu
2023-01-21  7:28   ` Greg KH
2023-01-22  1:48     ` Kechen Lu
2023-01-21  2:07 ` [RFC PATCH v6 2/6] KVM: x86: Move *_in_guest power management flags to vCPU scope Kechen Lu
2023-02-02 14:56   ` Zhi Wang [this message]
2023-02-02 19:42     ` Kechen Lu
2023-01-21  2:07 ` [RFC PATCH v6 3/6] KVM: x86: Reject disabling of MWAIT interception when not allowed Kechen Lu
2023-01-31 12:11   ` Zhao Liu
2023-02-01  0:43     ` Kechen Lu
2023-01-21  2:07 ` [RFC PATCH v6 4/6] KVM: x86: Let userspace re-enable previously disabled exits Kechen Lu
2023-01-30  6:19   ` Chao Gao
2023-01-30 20:25     ` Kechen Lu
2023-01-21  2:07 ` [RFC PATCH v6 5/6] KVM: x86: add vCPU scoped toggling for " Kechen Lu
2023-01-30  6:42   ` Chao Gao
2023-01-30 20:57     ` Kechen Lu
2023-01-31  2:23       ` Chao Gao
2023-01-21  2:07 ` [RFC PATCH v6 6/6] KVM: selftests: Add tests for VM and vCPU cap KVM_CAP_X86_DISABLE_EXITS Kechen Lu
2023-02-02 15:08   ` Zhi Wang
2023-02-02 20:17     ` Kechen Lu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230202165643.00003a3b@gmail.com \
    --to=zhi.wang.linux@gmail.com \
    --cc=chao.gao@intel.com \
    --cc=kechenl@nvidia.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=shaoqin.huang@intel.com \
    --cc=vkuznets@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox