All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Michael S. Tsirkin" <mst@redhat.com>
To: "Jan H. Schönherr" <jschoenh@amazon.de>
Cc: "Paolo Bonzini" <pbonzini@redhat.com>,
	"Radim Krčmář" <rkrcmar@redhat.com>,
	"Joerg Roedel" <joro@8bytes.org>,
	"KarimAllah Ahmed" <karahmed@amazon.de>,
	kvm@vger.kernel.org
Subject: Re: [PATCH 1/3] KVM: Don't enable MWAIT in guest by default
Date: Mon, 27 Nov 2017 22:46:55 +0200	[thread overview]
Message-ID: <20171127224620-mutt-send-email-mst@kernel.org> (raw)
In-Reply-To: <1511615373-32615-2-git-send-email-jschoenh@amazon.de>

On Sat, Nov 25, 2017 at 02:09:31PM +0100, Jan H. Schönherr wrote:
> Allowing a guest to execute MWAIT without interception enables a guest
> to put a (physical) CPU into a power saving state, where it takes
> longer to return from than what may be desired by the host.
> 
> Don't give a guest that power over a host by default. (Especially,
> since nothing prevents a guest from using MWAIT even when it is not
> advertised via CPUID.)
> 
> This restores the behavior from before Linux 4.12 commit 668fffa3f838
> ("kvm: better MWAIT emulation for guests") but keeps the option to
> enable MWAIT in guest for individual VMs.

As others pointed out, an interrupt will wake up the host CPU anyway.

Given that, what's the actual motivation here?

> Suggested-by: KarimAllah Ahmed <karahmed@amazon.de>
> Signed-off-by: Jan H. Schönherr <jschoenh@amazon.de>
> ---
> Note: AMD code paths are only compile tested
> ---
>  Documentation/virtual/kvm/api.txt | 20 ++++++++++--------
>  arch/x86/include/asm/kvm_host.h   |  2 ++
>  arch/x86/kvm/svm.c                |  2 +-
>  arch/x86/kvm/vmx.c                |  9 ++++----
>  arch/x86/kvm/x86.c                | 44 ++++++++++++++++++++++++++++++++++++++-
>  arch/x86/kvm/x86.h                | 35 ++-----------------------------
>  6 files changed, 64 insertions(+), 48 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
> index f670e4b..0ee812c 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -4163,6 +4163,17 @@ enables QEMU to build error log and branch to guest kernel registered
>  machine check handling routine. Without this capability KVM will
>  branch to guests' 0x200 interrupt vector.
>  
> +7.13 KVM_CAP_X86_GUEST_MWAIT
> +
> +Architectures: x86
> +Parameters: none
> +Returns: 0 on success
> +
> +This capability indicates that a guest using memory monitoring instructions
> +(MWAIT/MWAITX) to stop a virtual CPU will not cause a VM exit. As such, time
> +spent while a virtual CPU is halted in this way will then be accounted for as
> +guest running time on the host (as opposed to e.g. HLT).
> +
>  8. Other capabilities.
>  ----------------------
>  
> @@ -4275,15 +4286,6 @@ reserved.
>      Both registers and addresses are 64-bits wide.
>      It will be possible to run 64-bit or 32-bit guest code.
>  
> -8.8 KVM_CAP_X86_GUEST_MWAIT
> -
> -Architectures: x86
> -
> -This capability indicates that guest using memory monotoring instructions
> -(MWAIT/MWAITX) to stop the virtual CPU will not cause a VM exit.  As such time
> -spent while virtual CPU is halted in this way will then be accounted for as
> -guest running time on the host (as opposed to e.g. HLT).
> -
>  8.9 KVM_CAP_ARM_USER_IRQ
>  
>  Architectures: arm, arm64
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index b97726e..f7bcfaa 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -781,6 +781,8 @@ struct kvm_arch {
>  
>  	gpa_t wall_clock;
>  
> +	bool mwait_in_guest;
> +
>  	bool ept_identity_pagetable_done;
>  	gpa_t ept_identity_map_addr;
>  
> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
> index 1f3e7f2..ef1b320 100644
> --- a/arch/x86/kvm/svm.c
> +++ b/arch/x86/kvm/svm.c
> @@ -1253,7 +1253,7 @@ static void init_vmcb(struct vcpu_svm *svm)
>  	set_intercept(svm, INTERCEPT_WBINVD);
>  	set_intercept(svm, INTERCEPT_XSETBV);
>  
> -	if (!kvm_mwait_in_guest()) {
> +	if (!kvm_mwait_in_guest(svm->vcpu.kvm)) {
>  		set_intercept(svm, INTERCEPT_MONITOR);
>  		set_intercept(svm, INTERCEPT_MWAIT);
>  	}
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 1eb7053..a067735 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -3635,13 +3635,11 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
>  	      CPU_BASED_USE_IO_BITMAPS |
>  	      CPU_BASED_MOV_DR_EXITING |
>  	      CPU_BASED_USE_TSC_OFFSETING |
> +	      CPU_BASED_MWAIT_EXITING |
> +	      CPU_BASED_MONITOR_EXITING |
>  	      CPU_BASED_INVLPG_EXITING |
>  	      CPU_BASED_RDPMC_EXITING;
>  
> -	if (!kvm_mwait_in_guest())
> -		min |= CPU_BASED_MWAIT_EXITING |
> -			CPU_BASED_MONITOR_EXITING;
> -
>  	opt = CPU_BASED_TPR_SHADOW |
>  	      CPU_BASED_USE_MSR_BITMAPS |
>  	      CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
> @@ -5297,6 +5295,9 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
>  		exec_control |= CPU_BASED_CR3_STORE_EXITING |
>  				CPU_BASED_CR3_LOAD_EXITING  |
>  				CPU_BASED_INVLPG_EXITING;
> +	if (kvm_mwait_in_guest(vmx->vcpu.kvm))
> +		exec_control &= ~(CPU_BASED_MWAIT_EXITING |
> +				  CPU_BASED_MONITOR_EXITING);
>  	return exec_control;
>  }
>  
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 985a305..fe6627a 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -67,6 +67,7 @@
>  #include <asm/pvclock.h>
>  #include <asm/div64.h>
>  #include <asm/irq_remapping.h>
> +#include <asm/mwait.h>
>  
>  #define CREATE_TRACE_POINTS
>  #include "trace.h"
> @@ -2672,6 +2673,40 @@ static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
>  	return r;
>  }
>  
> +static bool kvm_mwait_in_guest_possible(void)
> +{
> +	unsigned int eax, ebx, ecx, edx;
> +
> +	if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
> +		return false;
> +
> +	switch (boot_cpu_data.x86_vendor) {
> +	case X86_VENDOR_AMD:
> +		/* All AMD CPUs have a working MWAIT implementation */
> +		return true;
> +	case X86_VENDOR_INTEL:
> +		/* Handle Intel below */
> +		break;
> +	default:
> +		return false;
> +	}
> +
> +	/*
> +	 * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
> +	 * they would allow guest to stop the CPU completely by disabling
> +	 * interrupts then invoking MWAIT.
> +	 */
> +	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
> +		return false;
> +
> +	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
> +
> +	if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
> +		return false;
> +
> +	return true;
> +}
> +
>  int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  {
>  	int r;
> @@ -2726,7 +2761,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
>  		r = KVM_CLOCK_TSC_STABLE;
>  		break;
>  	case KVM_CAP_X86_GUEST_MWAIT:
> -		r = kvm_mwait_in_guest();
> +		r = kvm_mwait_in_guest_possible();
>  		break;
>  	case KVM_CAP_X86_SMM:
>  		/* SMBASE is usually relocated above 1M on modern chipsets,
> @@ -4026,6 +4061,13 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
>  
>  		r = 0;
>  		break;
> +	case KVM_CAP_X86_GUEST_MWAIT:
> +		r = -EINVAL;
> +		if (kvm_mwait_in_guest_possible()) {
> +			kvm->arch.mwait_in_guest = true;
> +			r = 0;
> +		}
> +		break;
>  	default:
>  		r = -EINVAL;
>  		break;
> diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
> index d0b95b7..ed8e150 100644
> --- a/arch/x86/kvm/x86.h
> +++ b/arch/x86/kvm/x86.h
> @@ -2,8 +2,6 @@
>  #ifndef ARCH_X86_KVM_X86_H
>  #define ARCH_X86_KVM_X86_H
>  
> -#include <asm/processor.h>
> -#include <asm/mwait.h>
>  #include <linux/kvm_host.h>
>  #include <asm/pvclock.h>
>  #include "kvm_cache_regs.h"
> @@ -263,38 +261,9 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
>  	    __rem;						\
>  	 })
>  
> -static inline bool kvm_mwait_in_guest(void)
> +static inline bool kvm_mwait_in_guest(struct kvm *kvm)
>  {
> -	unsigned int eax, ebx, ecx, edx;
> -
> -	if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
> -		return false;
> -
> -	switch (boot_cpu_data.x86_vendor) {
> -	case X86_VENDOR_AMD:
> -		/* All AMD CPUs have a working MWAIT implementation */
> -		return true;
> -	case X86_VENDOR_INTEL:
> -		/* Handle Intel below */
> -		break;
> -	default:
> -		return false;
> -	}
> -
> -	/*
> -	 * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
> -	 * they would allow guest to stop the CPU completely by disabling
> -	 * interrupts then invoking MWAIT.
> -	 */
> -	if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
> -		return false;
> -
> -	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
> -
> -	if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
> -		return false;
> -
> -	return true;
> +	return kvm->arch.mwait_in_guest;
>  }
>  
>  #endif
> -- 
> 2.3.1.dirty

  parent reply	other threads:[~2017-11-27 20:46 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-25 13:09 [PATCH 0/3] KVM: Tie MWAIT/HLT/PAUSE interception to initially disabled capabilities Jan H. Schönherr
2017-11-25 13:09 ` [PATCH 1/3] KVM: Don't enable MWAIT in guest by default Jan H. Schönherr
2017-11-27 18:13   ` Jim Mattson
     [not found]     ` <82e1f7c8-fdd6-835e-319a-bec72d771ef9@redhat.com>
2017-11-27 18:32       ` Jim Mattson
2017-11-28 23:58     ` Jan H. Schönherr
2017-11-29 16:58       ` Radim Krčmář
2017-11-27 20:46   ` Michael S. Tsirkin [this message]
2017-11-27 22:36     ` Jan H. Schönherr
2017-11-28 14:00       ` Michael S. Tsirkin
2017-11-27 20:50   ` Michael S. Tsirkin
     [not found]     ` <90f7f081-95d7-f573-8b57-5c6e86fd2a8d@redhat.com>
2017-11-27 20:57       ` Michael S. Tsirkin
2017-11-25 13:09 ` [PATCH 2/3] KVM: Add capability to not exit on HLT Jan H. Schönherr
2017-11-27  1:32   ` Wanpeng Li
2017-11-27  1:47     ` Wanpeng Li
     [not found]       ` <a2f4cf7f-5d7b-a1cc-30d5-d18df4d49173@redhat.com>
2017-11-27 12:29         ` Jan H. Schönherr
     [not found]     ` <421c71fd-6dff-c01e-9e78-42f114711ea9@redhat.com>
2017-11-27 15:27       ` Jan H. Schönherr
     [not found]   ` <e17ea420-c141-18b6-2622-e33a3f540c61@redhat.com>
2017-11-27 16:12     ` Jan H. Schönherr
2017-11-27 20:45   ` Michael S. Tsirkin
     [not found]     ` <8ce45bad-b43c-4e97-aa69-74d7fc9cecb5@redhat.com>
2017-11-27 20:55       ` Michael S. Tsirkin
2017-11-28  1:34         ` Longpeng (Mike)
2017-11-28 14:04           ` Michael S. Tsirkin
2017-11-25 13:09 ` [PATCH 3/3] KVM: Add capability to not exit on PAUSE Jan H. Schönherr
2017-11-27 20:48   ` Michael S. Tsirkin
2017-11-28  3:37   ` Longpeng (Mike)
2017-11-29  0:09     ` Jan H. Schönherr
2017-11-29  4:34       ` Longpeng (Mike)
2017-11-29 12:20         ` Jan H. Schönherr
     [not found] ` <a3c80a22-ff69-fa51-ea90-48f039eb449a@redhat.com>
2017-11-28  0:15   ` [PATCH 0/3] KVM: Tie MWAIT/HLT/PAUSE interception to initially disabled capabilities Jan H. Schönherr
     [not found]     ` <8971d9e0-388c-9934-1ab2-33508cbbeb8f@redhat.com>
2017-11-28 10:42       ` Jan H. Schönherr
2017-11-28 14:08       ` Michael S. Tsirkin
     [not found]         ` <e61d93f0-17d9-d182-83ae-b7165ae3dcb0@redhat.com>
2017-11-29  0:20           ` Michael S. Tsirkin
2017-11-29  0:24             ` Michael S. Tsirkin
     [not found]             ` <8e559062-e459-5a85-a4a3-72a4baf7764c@redhat.com>
2017-11-29 15:13               ` Michael S. Tsirkin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20171127224620-mutt-send-email-mst@kernel.org \
    --to=mst@redhat.com \
    --cc=joro@8bytes.org \
    --cc=jschoenh@amazon.de \
    --cc=karahmed@amazon.de \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=rkrcmar@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.