[PATCH 2/3 v2] KVM: VMX: VMCLEAR/VMPTRLD usage changes.

Kernel KVM virtualization development
 help / color / mirror / Atom feed

* [PATCH 2/3 v2] KVM: VMX: VMCLEAR/VMPTRLD usage changes.
@ 2010-05-06  8:45 Xu, Dongxiao
  2010-05-06 13:37 ` Avi Kivity
  0 siblings, 1 reply; 3+ messages in thread
From: Xu, Dongxiao @ 2010-05-06  8:45 UTC (permalink / raw)
  To: kvm@vger.kernel.org; +Cc: Avi Kivity, Marcelo Tosatti, Alexander Graf

From: Dongxiao Xu <dongxiao.xu@intel.com>

Originally VMCLEAR/VMPTRLD is called on vcpu migration. To
support hosted VMM coexistance, VMCLEAR is executed on vcpu
schedule out, and VMPTRLD is executed on vcpu schedule in.
This could also eliminate the IPI when doing VMCLEAR.

Signed-off-by: Dongxiao Xu <dongxiao.xu@intel.com>
---
 arch/x86/kvm/vmx.c |   70 +++++++++++++++++++++++++++++++++++++---------------
 1 files changed, 50 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e77da89..aa3b2f9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -62,6 +62,9 @@ module_param_named(unrestricted_guest,
 static int __read_mostly emulate_invalid_guest_state = 0;
 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
 
+static int __read_mostly vmm_coexistence;
+module_param(vmm_coexistence, bool, S_IRUGO);
+
 #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST				\
 	(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
 #define KVM_GUEST_CR0_MASK						\
@@ -222,6 +225,7 @@ static u64 host_efer;
 
 static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
 
+static void vmx_flush_tlb(struct kvm_vcpu *vcpu);
 /*
  * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it
  * away by decrementing the array size.
@@ -784,25 +788,31 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u64 tsc_this, delta, new_offset;
 
-	if (vcpu->cpu != cpu) {
-		vcpu_clear(vmx);
-		kvm_migrate_timers(vcpu);
+	if (vmm_coexistence) {
+		vmcs_load(vmx->vmcs);
 		set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
-		local_irq_disable();
-		list_add(&vmx->local_vcpus_link,
-			 &per_cpu(vcpus_on_cpu, cpu));
-		local_irq_enable();
-	}
+	} else {
+		if (vcpu->cpu != cpu) {
+			vcpu_clear(vmx);
+			set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
+			local_irq_disable();
+			list_add(&vmx->local_vcpus_link,
+					&per_cpu(vcpus_on_cpu, cpu));
+			local_irq_enable();
+		}
 
-	if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
-		per_cpu(current_vmcs, cpu) = vmx->vmcs;
-		vmcs_load(vmx->vmcs);
+		if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
+			per_cpu(current_vmcs, cpu) = vmx->vmcs;
+			vmcs_load(vmx->vmcs);
+		}
 	}
 
 	if (vcpu->cpu != cpu) {
 		struct desc_ptr dt;
 		unsigned long sysenter_esp;
 
+		kvm_migrate_timers(vcpu);
+
 		vcpu->cpu = cpu;
 		/*
 		 * Linux uses per-cpu TSS and GDT, so set these when switching
@@ -829,7 +839,14 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+
 	__vmx_load_host_state(to_vmx(vcpu));
+	if (vmm_coexistence) {
+		vmcs_clear(vmx->vmcs);
+		rdtscll(vmx->vcpu.arch.host_tsc);
+		vmx->launched = 0;
+	}
 }
 
 static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
@@ -1280,7 +1297,8 @@ static void kvm_cpu_vmxoff(void)
 
 static void hardware_disable(void *garbage)
 {
-	vmclear_local_vcpus();
+	if (!vmm_coexistence)
+		vmclear_local_vcpus();
 	kvm_cpu_vmxoff();
 	write_cr4(read_cr4() & ~X86_CR4_VMXE);
 }
@@ -3927,7 +3945,8 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	if (vmx->vmcs) {
-		vcpu_clear(vmx);
+		if (!vmm_coexistence)
+			vcpu_clear(vmx);
 		free_vmcs(vmx->vmcs);
 		vmx->vmcs = NULL;
 	}
@@ -3969,13 +3988,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 	if (!vmx->vmcs)
 		goto free_msrs;
 
-	vmcs_clear(vmx->vmcs);
-
-	cpu = get_cpu();
-	vmx_vcpu_load(&vmx->vcpu, cpu);
-	err = vmx_vcpu_setup(vmx);
-	vmx_vcpu_put(&vmx->vcpu);
-	put_cpu();
+	if (vmm_coexistence) {
+		preempt_disable();
+		vmcs_load(vmx->vmcs);
+		err = vmx_vcpu_setup(vmx);
+		vmcs_clear(vmx->vmcs);
+		preempt_enable();
+	} else {
+		vmcs_clear(vmx->vmcs);
+		cpu = get_cpu();
+		vmx_vcpu_load(&vmx->vcpu, cpu);
+		err = vmx_vcpu_setup(vmx);
+		vmx_vcpu_put(&vmx->vcpu);
+		put_cpu();
+	}
 	if (err)
 		goto free_vmcs;
 	if (vm_need_virtualize_apic_accesses(kvm))
@@ -4116,6 +4142,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 
 	vmx->rdtscp_enabled = false;
 	if (vmx_rdtscp_supported()) {
+		if (vmm_coexistence)
+			vmcs_load(vmx->vmcs);
 		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
 		if (exec_control & SECONDARY_EXEC_RDTSCP) {
 			best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
@@ -4127,6 +4155,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 						exec_control);
 			}
 		}
+		if (vmm_coexistence)
+			vmcs_clear(vmx->vmcs);
 	}
 }
 
-- 
1.6.3

^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH 2/3 v2] KVM: VMX: VMCLEAR/VMPTRLD usage changes.
  2010-05-06  8:45 [PATCH 2/3 v2] KVM: VMX: VMCLEAR/VMPTRLD usage changes Xu, Dongxiao
@ 2010-05-06 13:37 ` Avi Kivity
  2010-05-07  2:32   ` Xu, Dongxiao
  0 siblings, 1 reply; 3+ messages in thread
From: Avi Kivity @ 2010-05-06 13:37 UTC (permalink / raw)
  To: Xu, Dongxiao; +Cc: kvm@vger.kernel.org, Marcelo Tosatti, Alexander Graf

On 05/06/2010 11:45 AM, Xu, Dongxiao wrote:
> From: Dongxiao Xu<dongxiao.xu@intel.com>
>
> Originally VMCLEAR/VMPTRLD is called on vcpu migration. To
> support hosted VMM coexistance, VMCLEAR is executed on vcpu
> schedule out, and VMPTRLD is executed on vcpu schedule in.
> This could also eliminate the IPI when doing VMCLEAR.
>
>
>
> +static int __read_mostly vmm_coexistence;
> +module_param(vmm_coexistence, bool, S_IRUGO);
>    

I think we can call it 'exclusive' defaulting to true.

> +
>   #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST				\
>   	(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
>   #define KVM_GUEST_CR0_MASK						\
> @@ -222,6 +225,7 @@ static u64 host_efer;
>
>   static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
>
> +static void vmx_flush_tlb(struct kvm_vcpu *vcpu);
>   /*
>    * Keep MSR_K6_STAR at the end, as setup_msrs() will try to optimize it
>    * away by decrementing the array size.
> @@ -784,25 +788,31 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>   	struct vcpu_vmx *vmx = to_vmx(vcpu);
>   	u64 tsc_this, delta, new_offset;
>
> -	if (vcpu->cpu != cpu) {
> -		vcpu_clear(vmx);
> -		kvm_migrate_timers(vcpu);
> +	if (vmm_coexistence) {
> +		vmcs_load(vmx->vmcs);
>   		set_bit(KVM_REQ_TLB_FLUSH,&vcpu->requests);
> -		local_irq_disable();
> -		list_add(&vmx->local_vcpus_link,
> -			&per_cpu(vcpus_on_cpu, cpu));
> -		local_irq_enable();
> -	}
> +	} else {
> +		if (vcpu->cpu != cpu) {
> +			vcpu_clear(vmx);
> +			set_bit(KVM_REQ_TLB_FLUSH,&vcpu->requests);
> +			local_irq_disable();
> +			list_add(&vmx->local_vcpus_link,
> +					&per_cpu(vcpus_on_cpu, cpu));
> +			local_irq_enable();
> +		}
>
> -	if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
> -		per_cpu(current_vmcs, cpu) = vmx->vmcs;
> -		vmcs_load(vmx->vmcs);
> +		if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
> +			per_cpu(current_vmcs, cpu) = vmx->vmcs;
> +			vmcs_load(vmx->vmcs);
> +		}
>   	}
>    

Please keep the exclusive use code as it is, and just add !exclusive 
cases.  For example. the current_vmcs test will always fail since 
current_vmcs will be set to NULL on vmx_vcpu_put, so we can have just 
one vmcs_load().

>
> @@ -829,7 +839,14 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>
>   static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
>   {
> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
> +
>   	__vmx_load_host_state(to_vmx(vcpu));
> +	if (vmm_coexistence) {
> +		vmcs_clear(vmx->vmcs);
> +		rdtscll(vmx->vcpu.arch.host_tsc);
> +		vmx->launched = 0;
>    

This code is also duplicated.  Please refactor to avoid duplication.

> +	}
>   }
>
>   static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
> @@ -1280,7 +1297,8 @@ static void kvm_cpu_vmxoff(void)
>
>   static void hardware_disable(void *garbage)
>   {
> -	vmclear_local_vcpus();
> +	if (!vmm_coexistence)
> +		vmclear_local_vcpus();
>   	kvm_cpu_vmxoff();
>   	write_cr4(read_cr4()&  ~X86_CR4_VMXE);
>   }
> @@ -3927,7 +3945,8 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu)
>   	struct vcpu_vmx *vmx = to_vmx(vcpu);
>
>   	if (vmx->vmcs) {
> -		vcpu_clear(vmx);
> +		if (!vmm_coexistence)
> +			vcpu_clear(vmx);
>    

What's wrong with doing this unconditionally?

>   		free_vmcs(vmx->vmcs);
>   		vmx->vmcs = NULL;
>   	}
> @@ -3969,13 +3988,20 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
>   	if (!vmx->vmcs)
>   		goto free_msrs;
>
> -	vmcs_clear(vmx->vmcs);
> -
> -	cpu = get_cpu();
> -	vmx_vcpu_load(&vmx->vcpu, cpu);
> -	err = vmx_vcpu_setup(vmx);
> -	vmx_vcpu_put(&vmx->vcpu);
> -	put_cpu();
> +	if (vmm_coexistence) {
> +		preempt_disable();
> +		vmcs_load(vmx->vmcs);
> +		err = vmx_vcpu_setup(vmx);
> +		vmcs_clear(vmx->vmcs);
> +		preempt_enable();
>    

Why won't the normal sequence work?

> +	} else {
> +		vmcs_clear(vmx->vmcs);
> +		cpu = get_cpu();
> +		vmx_vcpu_load(&vmx->vcpu, cpu);
> +		err = vmx_vcpu_setup(vmx);
> +		vmx_vcpu_put(&vmx->vcpu);
> +		put_cpu();
> +	}
>   	if (err)
>   		goto free_vmcs;
>   	if (vm_need_virtualize_apic_accesses(kvm))
> @@ -4116,6 +4142,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
>
>   	vmx->rdtscp_enabled = false;
>   	if (vmx_rdtscp_supported()) {
> +		if (vmm_coexistence)
> +			vmcs_load(vmx->vmcs);
>    

Don't understand why this is needed.  Shouldn't vmx_vcpu_load() load the 
vmptr?

>   		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
>   		if (exec_control&  SECONDARY_EXEC_RDTSCP) {
>   			best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
> @@ -4127,6 +4155,8 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
>   						exec_control);
>   			}
>   		}
> +		if (vmm_coexistence)
> +			vmcs_clear(vmx->vmcs);
>   	}
>   }
>
>    


-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 3+ messages in thread

* RE: [PATCH 2/3 v2] KVM: VMX: VMCLEAR/VMPTRLD usage changes.
  2010-05-06 13:37 ` Avi Kivity
@ 2010-05-07  2:32   ` Xu, Dongxiao
  0 siblings, 0 replies; 3+ messages in thread
From: Xu, Dongxiao @ 2010-05-07  2:32 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm@vger.kernel.org, Marcelo Tosatti, Alexander Graf

Avi Kivity wrote:
> On 05/06/2010 11:45 AM, Xu, Dongxiao wrote:
>> From: Dongxiao Xu<dongxiao.xu@intel.com>
>> 
>> Originally VMCLEAR/VMPTRLD is called on vcpu migration. To
>> support hosted VMM coexistance, VMCLEAR is executed on vcpu
>> schedule out, and VMPTRLD is executed on vcpu schedule in.
>> This could also eliminate the IPI when doing VMCLEAR.
>> 
>> 
>> 
>> +static int __read_mostly vmm_coexistence;
>> +module_param(vmm_coexistence, bool, S_IRUGO);
>> 
> 
> I think we can call it 'exclusive' defaulting to true.

I will change it in next version.

> 
>> +
>>   #define KVM_GUEST_CR0_MASK_UNRESTRICTED_GUEST				\
>>   	(X86_CR0_WP | X86_CR0_NE | X86_CR0_NW | X86_CR0_CD)
>>   #define KVM_GUEST_CR0_MASK						\
>> @@ -222,6 +225,7 @@ static u64 host_efer;
>> 
>>   static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
>> 
>> +static void vmx_flush_tlb(struct kvm_vcpu *vcpu);
>>   /*
>>    * Keep MSR_K6_STAR at the end, as setup_msrs() will try to
>> optimize it 
>>    * away by decrementing the array size.
>> @@ -784,25 +788,31 @@ static void vmx_vcpu_load(struct kvm_vcpu
>>   	*vcpu, int cpu) struct vcpu_vmx *vmx = to_vmx(vcpu);
>>   	u64 tsc_this, delta, new_offset;
>> 
>> -	if (vcpu->cpu != cpu) {
>> -		vcpu_clear(vmx);
>> -		kvm_migrate_timers(vcpu);
>> +	if (vmm_coexistence) {
>> +		vmcs_load(vmx->vmcs);
>>   		set_bit(KVM_REQ_TLB_FLUSH,&vcpu->requests);
>> -		local_irq_disable();
>> -		list_add(&vmx->local_vcpus_link,
>> -			&per_cpu(vcpus_on_cpu, cpu));
>> -		local_irq_enable();
>> -	}
>> +	} else {
>> +		if (vcpu->cpu != cpu) {
>> +			vcpu_clear(vmx);
>> +			set_bit(KVM_REQ_TLB_FLUSH,&vcpu->requests);
>> +			local_irq_disable();
>> +			list_add(&vmx->local_vcpus_link,
>> +					&per_cpu(vcpus_on_cpu, cpu));
>> +			local_irq_enable();
>> +		}
>> 
>> -	if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
>> -		per_cpu(current_vmcs, cpu) = vmx->vmcs;
>> -		vmcs_load(vmx->vmcs);
>> +		if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
>> +			per_cpu(current_vmcs, cpu) = vmx->vmcs;
>> +			vmcs_load(vmx->vmcs);
>> +		}
>>   	}
>> 
> 
> Please keep the exclusive use code as it is, and just add !exclusive
> cases.  For example. the current_vmcs test will always fail since
> current_vmcs will be set to NULL on vmx_vcpu_put, so we can have just
> one vmcs_load().

Thanks for the suggestion.

> 
>> 
>> @@ -829,7 +839,14 @@ static void vmx_vcpu_load(struct kvm_vcpu
>> *vcpu, int cpu) 
>> 
>>   static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
>>   {
>> +	struct vcpu_vmx *vmx = to_vmx(vcpu);
>> +
>>   	__vmx_load_host_state(to_vmx(vcpu));
>> +	if (vmm_coexistence) {
>> +		vmcs_clear(vmx->vmcs);
>> +		rdtscll(vmx->vcpu.arch.host_tsc);
>> +		vmx->launched = 0;
>> 
> 
> This code is also duplicated.  Please refactor to avoid duplication.

Thanks.

> 
>> +	}
>>   }
>> 
>>   static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
>> @@ -1280,7 +1297,8 @@ static void kvm_cpu_vmxoff(void)
>> 
>>   static void hardware_disable(void *garbage)
>>   {
>> -	vmclear_local_vcpus();
>> +	if (!vmm_coexistence)
>> +		vmclear_local_vcpus();
>>   	kvm_cpu_vmxoff();
>>   	write_cr4(read_cr4()&  ~X86_CR4_VMXE);
>>   }
>> @@ -3927,7 +3945,8 @@ static void vmx_free_vmcs(struct kvm_vcpu
>>   	*vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu);
>> 
>>   	if (vmx->vmcs) {
>> -		vcpu_clear(vmx);
>> +		if (!vmm_coexistence)
>> +			vcpu_clear(vmx);
>> 
> 
> What's wrong with doing this unconditionally?

The change should have beed put in PATCH 3/3.
This judgement is to avoid calling vcpu_clear() after kvm_cpu_vmxoff();
However it will not appear in next version since I will set vcpu->cpu=-1
in vmx_vcpu_put() !vmm_exclusive case, so that vcpu_clear() will not
executed actually.

> 
>>   		free_vmcs(vmx->vmcs);
>>   		vmx->vmcs = NULL;
>>   	}
>> @@ -3969,13 +3988,20 @@ static struct kvm_vcpu
>>   		*vmx_create_vcpu(struct kvm *kvm, unsigned int id)   	if
>> (!vmx->vmcs) goto free_msrs; 
>> 
>> -	vmcs_clear(vmx->vmcs);
>> -
>> -	cpu = get_cpu();
>> -	vmx_vcpu_load(&vmx->vcpu, cpu);
>> -	err = vmx_vcpu_setup(vmx);
>> -	vmx_vcpu_put(&vmx->vcpu);
>> -	put_cpu();
>> +	if (vmm_coexistence) {
>> +		preempt_disable();
>> +		vmcs_load(vmx->vmcs);
>> +		err = vmx_vcpu_setup(vmx);
>> +		vmcs_clear(vmx->vmcs);
>> +		preempt_enable();
>> 
> 
> Why won't the normal sequence work?

Yes you are right.

> 
>> +	} else {
>> +		vmcs_clear(vmx->vmcs);
>> +		cpu = get_cpu();
>> +		vmx_vcpu_load(&vmx->vcpu, cpu);
>> +		err = vmx_vcpu_setup(vmx);
>> +		vmx_vcpu_put(&vmx->vcpu);
>> +		put_cpu();
>> +	}
>>   	if (err)
>>   		goto free_vmcs;
>>   	if (vm_need_virtualize_apic_accesses(kvm))
>> @@ -4116,6 +4142,8 @@ static void vmx_cpuid_update(struct kvm_vcpu
>> *vcpu) 
>> 
>>   	vmx->rdtscp_enabled = false;
>>   	if (vmx_rdtscp_supported()) {
>> +		if (vmm_coexistence)
>> +			vmcs_load(vmx->vmcs);
>> 
> 
> Don't understand why this is needed.  Shouldn't vmx_vcpu_load() load
> the vmptr?

This change also should be put in PATCH 3/3, to avoid operating VMCS when vmx is off.
I will change it in next version.

Thanks,
Dongxiao

> 
>>   		exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
>>   		if (exec_control&  SECONDARY_EXEC_RDTSCP) {
>>   			best = kvm_find_cpuid_entry(vcpu, 0x80000001, 0);
>> @@ -4127,6 +4155,8 @@ static void vmx_cpuid_update(struct kvm_vcpu
>>   			*vcpu)   						exec_control); }
>>   		}
>> +		if (vmm_coexistence)
>> +			vmcs_clear(vmx->vmcs);
>>   	}
>>   }


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2010-05-07  2:33 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-05-06  8:45 [PATCH 2/3 v2] KVM: VMX: VMCLEAR/VMPTRLD usage changes Xu, Dongxiao
2010-05-06 13:37 ` Avi Kivity
2010-05-07  2:32   ` Xu, Dongxiao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox