[PATCH 0/2] KVM: x86: more xsave and mpx improvements

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/2] KVM: x86: more xsave and mpx improvements
@ 2014-02-25 17:49 Paolo Bonzini
  2014-02-25 17:49 ` [PATCH 1/2] KVM: x86: introduce kvm_supported_xcr0() Paolo Bonzini
  2014-02-25 17:49 ` [PATCH 2/2] KVM: x86: Add nested virtualization support for MPX Paolo Bonzini
  0 siblings, 2 replies; 7+ messages in thread
From: Paolo Bonzini @ 2014-02-25 17:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: gleb, mtosatti, jan.kiszka, Liu Jinsong

Here are the patches I mentioned while reviewing Liu Jinsong's MPX
series.

Patch 1 is a further cleanup of xcr0 handling, and patch 2 introduces
nested virtualization support for MPX.

Please review.

Thanks,

Paolo

Paolo Bonzini (2):
  KVM: x86: introduce kvm_supported_xcr0()
  KVM: x86: Add nested virtualization support for MPX

 arch/x86/kvm/cpuid.c | 27 ++++++++++++++++-----------
 arch/x86/kvm/vmx.c   | 16 ++++++++++++++++
 arch/x86/kvm/x86.c   |  4 +---
 arch/x86/kvm/x86.h   |  2 ++
 4 files changed, 35 insertions(+), 14 deletions(-)

-- 
1.8.3.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/2] KVM: x86: introduce kvm_supported_xcr0()
  2014-02-25 17:49 [PATCH 0/2] KVM: x86: more xsave and mpx improvements Paolo Bonzini
@ 2014-02-25 17:49 ` Paolo Bonzini
  2014-02-25 17:49 ` [PATCH 2/2] KVM: x86: Add nested virtualization support for MPX Paolo Bonzini
  1 sibling, 0 replies; 7+ messages in thread
From: Paolo Bonzini @ 2014-02-25 17:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: gleb, mtosatti, jan.kiszka, Liu Jinsong

XSAVE support for KVM is already using host_xcr0 & KVM_SUPPORTED_XCR0 as
a "dynamic" version of KVM_SUPPORTED_XCR0.

However, this is not enough because the MPX bits should not be presented
to the guest unless kvm_x86_ops confirms the support.  So, replace all
instances of host_xcr0 & KVM_SUPPORTED_XCR0 with a new function
kvm_supported_xcr0() that also has this check.

Note that here:

		if (xstate_bv & ~KVM_SUPPORTED_XCR0)
			return -EINVAL;
		if (xstate_bv & ~host_cr0)
			return -EINVAL;

the code is equivalent to

		if ((xstate_bv & ~KVM_SUPPORTED_XCR0) ||
		    (xstate_bv & ~host_cr0)
			return -EINVAL;

i.e. "xstate_bv & (~KVM_SUPPORTED_XCR0 | ~host_cr0)" which is in turn
equal to "xstate_bv & ~(KVM_SUPPORTED_XCR0 & host_cr0)".  So we should
also use the new function there.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 27 ++++++++++++++++-----------
 arch/x86/kvm/x86.c   |  4 +---
 arch/x86/kvm/x86.h   |  2 ++
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ddc8a7e165df..18aefb4d0927 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -43,6 +43,16 @@ static u32 xstate_required_size(u64 xstate_bv)
 	return ret;
 }
 
+u64 kvm_supported_xcr0(void)
+{
+	u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0;
+
+	if (!kvm_x86_ops->mpx_supported || !kvm_x86_ops->mpx_supported())
+		xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR);
+
+	return xcr0;
+}
+
 void kvm_update_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
@@ -73,7 +83,7 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu)
 	} else {
 		vcpu->arch.guest_supported_xcr0 =
 			(best->eax | ((u64)best->edx << 32)) &
-			host_xcr0 & KVM_SUPPORTED_XCR0;
+			kvm_supported_xcr0();
 		vcpu->arch.guest_xstate_size = best->ebx =
 			xstate_required_size(vcpu->arch.xcr0);
 	}
@@ -210,13 +220,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	entry->flags = 0;
 }
 
-static bool supported_xcr0_bit(unsigned bit)
-{
-	u64 mask = ((u64)1 << bit);
-
-	return mask & KVM_SUPPORTED_XCR0 & host_xcr0;
-}
-
 #define F(x) bit(X86_FEATURE_##x)
 
 static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
@@ -439,16 +442,18 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	}
 	case 0xd: {
 		int idx, i;
+		u64 supported = kvm_supported_xcr0();
 
-		entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0;
-		entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32;
+		entry->eax &= supported;
+		entry->edx &= supported >> 32;
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 		for (idx = 1, i = 1; idx < 64; ++idx) {
+			u64 mask = ((u64)1 << idx);
 			if (*nent >= maxnent)
 				goto out;
 
 			do_cpuid_1_ent(&entry[i], function, idx);
-			if (entry[i].eax == 0 || !supported_xcr0_bit(idx))
+			if (entry[i].eax == 0 || !(supported & mask))
 				continue;
 			entry[i].flags |=
 			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6530019116b0..4e038cda45a2 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3048,9 +3048,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu,
 		 * CPUID leaf 0xD, index 0, EDX:EAX.  This is for compatibility
 		 * with old userspace.
 		 */
-		if (xstate_bv & ~KVM_SUPPORTED_XCR0)
-			return -EINVAL;
-		if (xstate_bv & ~host_xcr0)
+		if (xstate_bv & ~kvm_supported_xcr0())
 			return -EINVAL;
 		memcpy(&vcpu->arch.guest_fpu.state->xsave,
 			guest_xsave->region, vcpu->arch.guest_xstate_size);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 392ecbff0030..8c97bac9a895 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -126,6 +126,8 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 				| XSTATE_BNDREGS | XSTATE_BNDCSR)
 extern u64 host_xcr0;
 
+extern u64 kvm_supported_xcr0(void);
+
 extern unsigned int min_timer_period_us;
 
 extern struct static_key kvm_no_apic_vcpu;
-- 
1.8.3.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/2] KVM: x86: Add nested virtualization support for MPX
  2014-02-25 17:49 [PATCH 0/2] KVM: x86: more xsave and mpx improvements Paolo Bonzini
  2014-02-25 17:49 ` [PATCH 1/2] KVM: x86: introduce kvm_supported_xcr0() Paolo Bonzini
@ 2014-02-25 17:49 ` Paolo Bonzini
  2014-02-25 18:05   ` Jan Kiszka
  1 sibling, 1 reply; 7+ messages in thread
From: Paolo Bonzini @ 2014-02-25 17:49 UTC (permalink / raw)
  To: linux-kernel; +Cc: gleb, mtosatti, jan.kiszka, Liu Jinsong

This is simple to do, the "host" BNDCFGS is either 0 or the guest value.
However, both controls have to be present.  We cannot provide MPX if
we only have one of the "load BNDCFGS" or "clear BNDCFGS" controls.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 729b1e42aded..da28ac46ca88 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -202,6 +202,7 @@ struct __packed vmcs12 {
 	u64 guest_pdptr1;
 	u64 guest_pdptr2;
 	u64 guest_pdptr3;
+	u64 guest_bndcfgs;
 	u64 host_ia32_pat;
 	u64 host_ia32_efer;
 	u64 host_ia32_perf_global_ctrl;
@@ -534,6 +535,7 @@ static const unsigned long shadow_read_write_fields[] = {
 	GUEST_CS_LIMIT,
 	GUEST_CS_BASE,
 	GUEST_ES_BASE,
+	GUEST_BNDCFGS,
 	CR0_GUEST_HOST_MASK,
 	CR0_READ_SHADOW,
 	CR4_READ_SHADOW,
@@ -589,6 +591,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD64(GUEST_PDPTR1, guest_pdptr1),
 	FIELD64(GUEST_PDPTR2, guest_pdptr2),
 	FIELD64(GUEST_PDPTR3, guest_pdptr3),
+	FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
 	FIELD64(HOST_IA32_PAT, host_ia32_pat),
 	FIELD64(HOST_IA32_EFER, host_ia32_efer),
 	FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
@@ -719,6 +722,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
 static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
+static bool vmx_mpx_supported(void);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
@@ -2279,6 +2283,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 	}
 	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
 		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
+	if (vmx_mpx_supported())
+		nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
 
 	/* entry controls */
 	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
@@ -2292,6 +2298,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 		VM_ENTRY_LOAD_IA32_PAT;
 	nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
 				       VM_ENTRY_LOAD_IA32_EFER);
+	if (vmx_mpx_supported())
+		nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
 
 	/* cpu-based controls */
 	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
@@ -7847,6 +7855,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
 	set_cr4_guest_host_mask(vmx);
 
+	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
+		vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
+
 	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
 		vmcs_write64(TSC_OFFSET,
 			vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
@@ -8277,6 +8288,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 	vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
 	vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
 	vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
+	vmcs12->guest_bndcfgs = vmcs_readl(GUEST_BNDCFGS);
 
 	/* update exit information fields: */
 
@@ -8386,6 +8398,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
 	vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
 	vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
 
+	/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
+	if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
+		vmcs_write64(GUEST_BNDCFGS, 0);
+
 	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
 		vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
 		vcpu->arch.pat = vmcs12->host_ia32_pat;
-- 
1.8.3.1


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Add nested virtualization support for MPX
  2014-02-25 17:49 ` [PATCH 2/2] KVM: x86: Add nested virtualization support for MPX Paolo Bonzini
@ 2014-02-25 18:05   ` Jan Kiszka
  2014-02-25 18:13     ` Paolo Bonzini
  0 siblings, 1 reply; 7+ messages in thread
From: Jan Kiszka @ 2014-02-25 18:05 UTC (permalink / raw)
  To: Paolo Bonzini, linux-kernel; +Cc: gleb, mtosatti, Liu Jinsong

On 2014-02-25 18:49, Paolo Bonzini wrote:
> This is simple to do, the "host" BNDCFGS is either 0 or the guest value.
> However, both controls have to be present.  We cannot provide MPX if
> we only have one of the "load BNDCFGS" or "clear BNDCFGS" controls.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arch/x86/kvm/vmx.c | 16 ++++++++++++++++
>  1 file changed, 16 insertions(+)
> 
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 729b1e42aded..da28ac46ca88 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -202,6 +202,7 @@ struct __packed vmcs12 {
>  	u64 guest_pdptr1;
>  	u64 guest_pdptr2;
>  	u64 guest_pdptr3;
> +	u64 guest_bndcfgs;
>  	u64 host_ia32_pat;
>  	u64 host_ia32_efer;
>  	u64 host_ia32_perf_global_ctrl;
> @@ -534,6 +535,7 @@ static const unsigned long shadow_read_write_fields[] = {
>  	GUEST_CS_LIMIT,
>  	GUEST_CS_BASE,
>  	GUEST_ES_BASE,
> +	GUEST_BNDCFGS,
>  	CR0_GUEST_HOST_MASK,
>  	CR0_READ_SHADOW,
>  	CR4_READ_SHADOW,
> @@ -589,6 +591,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
>  	FIELD64(GUEST_PDPTR1, guest_pdptr1),
>  	FIELD64(GUEST_PDPTR2, guest_pdptr2),
>  	FIELD64(GUEST_PDPTR3, guest_pdptr3),
> +	FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
>  	FIELD64(HOST_IA32_PAT, host_ia32_pat),
>  	FIELD64(HOST_IA32_EFER, host_ia32_efer),
>  	FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
> @@ -719,6 +722,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
>  static u64 construct_eptp(unsigned long root_hpa);
>  static void kvm_cpu_vmxon(u64 addr);
>  static void kvm_cpu_vmxoff(void);
> +static bool vmx_mpx_supported(void);
>  static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
>  static void vmx_set_segment(struct kvm_vcpu *vcpu,
>  			    struct kvm_segment *var, int seg);
> @@ -2279,6 +2283,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>  	}
>  	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>  		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
> +	if (vmx_mpx_supported())
> +		nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
>  
>  	/* entry controls */
>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
> @@ -2292,6 +2298,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>  		VM_ENTRY_LOAD_IA32_PAT;
>  	nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
>  				       VM_ENTRY_LOAD_IA32_EFER);
> +	if (vmx_mpx_supported())
> +		nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
>  
>  	/* cpu-based controls */
>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
> @@ -7847,6 +7855,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>  
>  	set_cr4_guest_host_mask(vmx);
>  
> +	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
> +		vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
> +
>  	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
>  		vmcs_write64(TSC_OFFSET,
>  			vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
> @@ -8277,6 +8288,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>  	vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
>  	vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
>  	vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
> +	vmcs12->guest_bndcfgs = vmcs_readl(GUEST_BNDCFGS);

Can we read this value unconditionally, even when the host does not
support the feature?

>  
>  	/* update exit information fields: */
>  
> @@ -8386,6 +8398,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
>  	vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
>  	vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
>  
> +	/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
> +	if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
> +		vmcs_write64(GUEST_BNDCFGS, 0);
> +
>  	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
>  		vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
>  		vcpu->arch.pat = vmcs12->host_ia32_pat;
> 

Do we also have a unit test to stress this? Or are we lacking silicon
with MPX and corresponding VMX features?

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Add nested virtualization support for MPX
  2014-02-25 18:05   ` Jan Kiszka
@ 2014-02-25 18:13     ` Paolo Bonzini
  2014-02-25 18:46       ` Paolo Bonzini
  2014-02-25 18:49       ` Jan Kiszka
  0 siblings, 2 replies; 7+ messages in thread
From: Paolo Bonzini @ 2014-02-25 18:13 UTC (permalink / raw)
  To: Jan Kiszka, linux-kernel; +Cc: gleb, mtosatti, Liu Jinsong

Il 25/02/2014 19:05, Jan Kiszka ha scritto:
> On 2014-02-25 18:49, Paolo Bonzini wrote:
>> This is simple to do, the "host" BNDCFGS is either 0 or the guest value.
>> However, both controls have to be present.  We cannot provide MPX if
>> we only have one of the "load BNDCFGS" or "clear BNDCFGS" controls.
>>
>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>> ---
>>  arch/x86/kvm/vmx.c | 16 ++++++++++++++++
>>  1 file changed, 16 insertions(+)
>>
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 729b1e42aded..da28ac46ca88 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -202,6 +202,7 @@ struct __packed vmcs12 {
>>  	u64 guest_pdptr1;
>>  	u64 guest_pdptr2;
>>  	u64 guest_pdptr3;
>> +	u64 guest_bndcfgs;
>>  	u64 host_ia32_pat;
>>  	u64 host_ia32_efer;
>>  	u64 host_ia32_perf_global_ctrl;
>> @@ -534,6 +535,7 @@ static const unsigned long shadow_read_write_fields[] = {
>>  	GUEST_CS_LIMIT,
>>  	GUEST_CS_BASE,
>>  	GUEST_ES_BASE,
>> +	GUEST_BNDCFGS,
>>  	CR0_GUEST_HOST_MASK,
>>  	CR0_READ_SHADOW,
>>  	CR4_READ_SHADOW,
>> @@ -589,6 +591,7 @@ static const unsigned short vmcs_field_to_offset_table[] = {
>>  	FIELD64(GUEST_PDPTR1, guest_pdptr1),
>>  	FIELD64(GUEST_PDPTR2, guest_pdptr2),
>>  	FIELD64(GUEST_PDPTR3, guest_pdptr3),
>> +	FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
>>  	FIELD64(HOST_IA32_PAT, host_ia32_pat),
>>  	FIELD64(HOST_IA32_EFER, host_ia32_efer),
>>  	FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
>> @@ -719,6 +722,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
>>  static u64 construct_eptp(unsigned long root_hpa);
>>  static void kvm_cpu_vmxon(u64 addr);
>>  static void kvm_cpu_vmxoff(void);
>> +static bool vmx_mpx_supported(void);
>>  static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
>>  static void vmx_set_segment(struct kvm_vcpu *vcpu,
>>  			    struct kvm_segment *var, int seg);
>> @@ -2279,6 +2283,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>>  	}
>>  	nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>>  		VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
>> +	if (vmx_mpx_supported())
>> +		nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
>>
>>  	/* entry controls */
>>  	rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
>> @@ -2292,6 +2298,8 @@ static __init void nested_vmx_setup_ctls_msrs(void)
>>  		VM_ENTRY_LOAD_IA32_PAT;
>>  	nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
>>  				       VM_ENTRY_LOAD_IA32_EFER);
>> +	if (vmx_mpx_supported())
>> +		nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
>>
>>  	/* cpu-based controls */
>>  	rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
>> @@ -7847,6 +7855,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
>>
>>  	set_cr4_guest_host_mask(vmx);
>>
>> +	if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
>> +		vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
>> +
>>  	if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING)
>>  		vmcs_write64(TSC_OFFSET,
>>  			vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
>> @@ -8277,6 +8288,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
>>  	vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
>>  	vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
>>  	vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
>> +	vmcs12->guest_bndcfgs = vmcs_readl(GUEST_BNDCFGS);
>
> Can we read this value unconditionally, even when the host does not
> support the feature?

return -EWRONGPATCH;

>>
>>  	/* update exit information fields: */
>>
>> @@ -8386,6 +8398,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
>>  	vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
>>  	vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
>>
>> +	/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
>> +	if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
>> +		vmcs_write64(GUEST_BNDCFGS, 0);
>> +
>>  	if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
>>  		vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
>>  		vcpu->arch.pat = vmcs12->host_ia32_pat;
>>
>
> Do we also have a unit test to stress this? Or are we lacking silicon
> with MPX and corresponding VMX features?

No silicon yet.

There is an emulator, but it is already slow enough without nested 
virtualization... it would be three-level virtualization :)

Paolo


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Add nested virtualization support for MPX
  2014-02-25 18:13     ` Paolo Bonzini
@ 2014-02-25 18:46       ` Paolo Bonzini
  2014-02-25 18:49       ` Jan Kiszka
  1 sibling, 0 replies; 7+ messages in thread
From: Paolo Bonzini @ 2014-02-25 18:46 UTC (permalink / raw)
  To: Jan Kiszka, linux-kernel; +Cc: gleb, mtosatti, Liu Jinsong

Il 25/02/2014 19:13, Paolo Bonzini ha scritto:
> Il 25/02/2014 19:05, Jan Kiszka ha scritto:
>> On 2014-02-25 18:49, Paolo Bonzini wrote:
>>> This is simple to do, the "host" BNDCFGS is either 0 or the guest value.
>>> However, both controls have to be present.  We cannot provide MPX if
>>> we only have one of the "load BNDCFGS" or "clear BNDCFGS" controls.
>>>
>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>> ---
>>>  arch/x86/kvm/vmx.c | 16 ++++++++++++++++
>>>  1 file changed, 16 insertions(+)
>>>
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index 729b1e42aded..da28ac46ca88 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -202,6 +202,7 @@ struct __packed vmcs12 {
>>>      u64 guest_pdptr1;
>>>      u64 guest_pdptr2;
>>>      u64 guest_pdptr3;
>>> +    u64 guest_bndcfgs;
>>>      u64 host_ia32_pat;
>>>      u64 host_ia32_efer;
>>>      u64 host_ia32_perf_global_ctrl;
>>> @@ -534,6 +535,7 @@ static const unsigned long
>>> shadow_read_write_fields[] = {
>>>      GUEST_CS_LIMIT,
>>>      GUEST_CS_BASE,
>>>      GUEST_ES_BASE,
>>> +    GUEST_BNDCFGS,
>>>      CR0_GUEST_HOST_MASK,
>>>      CR0_READ_SHADOW,
>>>      CR4_READ_SHADOW,
>>> @@ -589,6 +591,7 @@ static const unsigned short
>>> vmcs_field_to_offset_table[] = {
>>>      FIELD64(GUEST_PDPTR1, guest_pdptr1),
>>>      FIELD64(GUEST_PDPTR2, guest_pdptr2),
>>>      FIELD64(GUEST_PDPTR3, guest_pdptr3),
>>> +    FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
>>>      FIELD64(HOST_IA32_PAT, host_ia32_pat),
>>>      FIELD64(HOST_IA32_EFER, host_ia32_efer),
>>>      FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
>>> @@ -719,6 +722,7 @@ static unsigned long nested_ept_get_cr3(struct
>>> kvm_vcpu *vcpu);
>>>  static u64 construct_eptp(unsigned long root_hpa);
>>>  static void kvm_cpu_vmxon(u64 addr);
>>>  static void kvm_cpu_vmxoff(void);
>>> +static bool vmx_mpx_supported(void);
>>>  static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
>>>  static void vmx_set_segment(struct kvm_vcpu *vcpu,
>>>                  struct kvm_segment *var, int seg);
>>> @@ -2279,6 +2283,8 @@ static __init void
>>> nested_vmx_setup_ctls_msrs(void)
>>>      }
>>>      nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>>>          VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
>>> +    if (vmx_mpx_supported())
>>> +        nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
>>>
>>>      /* entry controls */
>>>      rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
>>> @@ -2292,6 +2298,8 @@ static __init void
>>> nested_vmx_setup_ctls_msrs(void)
>>>          VM_ENTRY_LOAD_IA32_PAT;
>>>      nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
>>>                         VM_ENTRY_LOAD_IA32_EFER);
>>> +    if (vmx_mpx_supported())
>>> +        nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
>>>
>>>      /* cpu-based controls */
>>>      rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
>>> @@ -7847,6 +7855,9 @@ static void prepare_vmcs02(struct kvm_vcpu
>>> *vcpu, struct vmcs12 *vmcs12)
>>>
>>>      set_cr4_guest_host_mask(vmx);
>>>
>>> +    if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
>>> +        vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
>>> +
>>>      if (vmcs12->cpu_based_vm_exec_control &
>>> CPU_BASED_USE_TSC_OFFSETING)
>>>          vmcs_write64(TSC_OFFSET,
>>>              vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
>>> @@ -8277,6 +8288,7 @@ static void prepare_vmcs12(struct kvm_vcpu
>>> *vcpu, struct vmcs12 *vmcs12,
>>>      vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
>>>      vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
>>>      vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
>>> +    vmcs12->guest_bndcfgs = vmcs_readl(GUEST_BNDCFGS);
>>
>> Can we read this value unconditionally, even when the host does not
>> support the feature?
>
> return -EWRONGPATCH;

Makes sense to clarify since I'll only be able to send the right patch 
tomorrow.

I had not noticed this problem because vmcs_readl just returns a random 
value if it fails and GUEST_BNDCFGS will not be vmcs_written. 
x86/vmx.flat passes even with this bug.

However, this should be a vmcs_read64.

Paolo

>>>
>>>      /* update exit information fields: */
>>>
>>> @@ -8386,6 +8398,10 @@ static void load_vmcs12_host_state(struct
>>> kvm_vcpu *vcpu,
>>>      vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
>>>      vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
>>>
>>> +    /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
>>> +    if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
>>> +        vmcs_write64(GUEST_BNDCFGS, 0);
>>> +
>>>      if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
>>>          vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
>>>          vcpu->arch.pat = vmcs12->host_ia32_pat;
>>>
>>
>> Do we also have a unit test to stress this? Or are we lacking silicon
>> with MPX and corresponding VMX features?
>
> No silicon yet.
>
> There is an emulator, but it is already slow enough without nested
> virtualization... it would be three-level virtualization :)
>
> Paolo
>


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2/2] KVM: x86: Add nested virtualization support for MPX
  2014-02-25 18:13     ` Paolo Bonzini
  2014-02-25 18:46       ` Paolo Bonzini
@ 2014-02-25 18:49       ` Jan Kiszka
  1 sibling, 0 replies; 7+ messages in thread
From: Jan Kiszka @ 2014-02-25 18:49 UTC (permalink / raw)
  To: Paolo Bonzini, linux-kernel; +Cc: gleb, mtosatti, Liu Jinsong

On 2014-02-25 19:13, Paolo Bonzini wrote:
> Il 25/02/2014 19:05, Jan Kiszka ha scritto:
>> On 2014-02-25 18:49, Paolo Bonzini wrote:
>>> This is simple to do, the "host" BNDCFGS is either 0 or the guest value.
>>> However, both controls have to be present.  We cannot provide MPX if
>>> we only have one of the "load BNDCFGS" or "clear BNDCFGS" controls.
>>>
>>> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
>>> ---
>>>  arch/x86/kvm/vmx.c | 16 ++++++++++++++++
>>>  1 file changed, 16 insertions(+)
>>>
>>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>>> index 729b1e42aded..da28ac46ca88 100644
>>> --- a/arch/x86/kvm/vmx.c
>>> +++ b/arch/x86/kvm/vmx.c
>>> @@ -202,6 +202,7 @@ struct __packed vmcs12 {
>>>      u64 guest_pdptr1;
>>>      u64 guest_pdptr2;
>>>      u64 guest_pdptr3;
>>> +    u64 guest_bndcfgs;
>>>      u64 host_ia32_pat;
>>>      u64 host_ia32_efer;
>>>      u64 host_ia32_perf_global_ctrl;
>>> @@ -534,6 +535,7 @@ static const unsigned long
>>> shadow_read_write_fields[] = {
>>>      GUEST_CS_LIMIT,
>>>      GUEST_CS_BASE,
>>>      GUEST_ES_BASE,
>>> +    GUEST_BNDCFGS,
>>>      CR0_GUEST_HOST_MASK,
>>>      CR0_READ_SHADOW,
>>>      CR4_READ_SHADOW,
>>> @@ -589,6 +591,7 @@ static const unsigned short
>>> vmcs_field_to_offset_table[] = {
>>>      FIELD64(GUEST_PDPTR1, guest_pdptr1),
>>>      FIELD64(GUEST_PDPTR2, guest_pdptr2),
>>>      FIELD64(GUEST_PDPTR3, guest_pdptr3),
>>> +    FIELD64(GUEST_BNDCFGS, guest_bndcfgs),
>>>      FIELD64(HOST_IA32_PAT, host_ia32_pat),
>>>      FIELD64(HOST_IA32_EFER, host_ia32_efer),
>>>      FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl),
>>> @@ -719,6 +722,7 @@ static unsigned long nested_ept_get_cr3(struct
>>> kvm_vcpu *vcpu);
>>>  static u64 construct_eptp(unsigned long root_hpa);
>>>  static void kvm_cpu_vmxon(u64 addr);
>>>  static void kvm_cpu_vmxoff(void);
>>> +static bool vmx_mpx_supported(void);
>>>  static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
>>>  static void vmx_set_segment(struct kvm_vcpu *vcpu,
>>>                  struct kvm_segment *var, int seg);
>>> @@ -2279,6 +2283,8 @@ static __init void
>>> nested_vmx_setup_ctls_msrs(void)
>>>      }
>>>      nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
>>>          VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER);
>>> +    if (vmx_mpx_supported())
>>> +        nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS;
>>>
>>>      /* entry controls */
>>>      rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
>>> @@ -2292,6 +2298,8 @@ static __init void
>>> nested_vmx_setup_ctls_msrs(void)
>>>          VM_ENTRY_LOAD_IA32_PAT;
>>>      nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR |
>>>                         VM_ENTRY_LOAD_IA32_EFER);
>>> +    if (vmx_mpx_supported())
>>> +        nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS;
>>>
>>>      /* cpu-based controls */
>>>      rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
>>> @@ -7847,6 +7855,9 @@ static void prepare_vmcs02(struct kvm_vcpu
>>> *vcpu, struct vmcs12 *vmcs12)
>>>
>>>      set_cr4_guest_host_mask(vmx);
>>>
>>> +    if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)
>>> +        vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
>>> +
>>>      if (vmcs12->cpu_based_vm_exec_control &
>>> CPU_BASED_USE_TSC_OFFSETING)
>>>          vmcs_write64(TSC_OFFSET,
>>>              vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset);
>>> @@ -8277,6 +8288,7 @@ static void prepare_vmcs12(struct kvm_vcpu
>>> *vcpu, struct vmcs12 *vmcs12,
>>>      vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS);
>>>      vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP);
>>>      vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
>>> +    vmcs12->guest_bndcfgs = vmcs_readl(GUEST_BNDCFGS);
>>
>> Can we read this value unconditionally, even when the host does not
>> support the feature?
> 
> return -EWRONGPATCH;
> 
>>>
>>>      /* update exit information fields: */
>>>
>>> @@ -8386,6 +8398,10 @@ static void load_vmcs12_host_state(struct
>>> kvm_vcpu *vcpu,
>>>      vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
>>>      vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
>>>
>>> +    /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1.  */
>>> +    if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
>>> +        vmcs_write64(GUEST_BNDCFGS, 0);
>>> +
>>>      if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
>>>          vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
>>>          vcpu->arch.pat = vmcs12->host_ia32_pat;
>>>
>>
>> Do we also have a unit test to stress this? Or are we lacking silicon
>> with MPX and corresponding VMX features?
> 
> No silicon yet.
> 
> There is an emulator, but it is already slow enough without nested
> virtualization... it would be three-level virtualization :)

Cool! :) Maybe it will get faster again when we nest even deeper - no
one was there so far to tell us...

But even if not, the vmx unit tests are pretty short and should not
suffer much.

Jan

-- 
Siemens AG, Corporate Technology, CT RTC ITP SES-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-02-25 18:50 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-02-25 17:49 [PATCH 0/2] KVM: x86: more xsave and mpx improvements Paolo Bonzini
2014-02-25 17:49 ` [PATCH 1/2] KVM: x86: introduce kvm_supported_xcr0() Paolo Bonzini
2014-02-25 17:49 ` [PATCH 2/2] KVM: x86: Add nested virtualization support for MPX Paolo Bonzini
2014-02-25 18:05   ` Jan Kiszka
2014-02-25 18:13     ` Paolo Bonzini
2014-02-25 18:46       ` Paolo Bonzini
2014-02-25 18:49       ` Jan Kiszka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox