public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2/2] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware
       [not found] ` <20070726144602.4847.64724.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
@ 2007-07-26 14:52   ` Gregory Haskins
       [not found]     ` <20070726145210.4847.90637.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Gregory Haskins @ 2007-07-26 14:52 UTC (permalink / raw)
  To: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f
  Cc: ghaskins-Et1tbQHTxzrQT0dZR+AlfA

We need to provide locking around the current_vmcs/VMCS interactions to
protect against race conditions.

Signed-off-by: Gregory Haskins <ghaskins-Et1tbQHTxzrQT0dZR+AlfA@public.gmane.org>
---

 drivers/kvm/vmx.c |   77 ++++++++++++++++++++++++++++++++++++++++++++---------
 1 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 5f0a7fd..6b697f8 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -188,6 +188,20 @@ static struct kvm_msr_entry *find_msr_entry(struct kvm_vcpu *vcpu, u32 msr)
 	return NULL;
 }
 
+static void vmcs_load(struct vmcs *vmcs)
+{
+	u64 phys_addr = __pa(vmcs);
+	u8 error;
+	
+	asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
+		      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
+		      : "cc");
+
+	if (error)
+		printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
+		       vmcs, phys_addr);
+}
+
 static void vmcs_clear(struct vmcs *vmcs)
 {
 	u64 phys_addr = __pa(vmcs);
@@ -205,11 +219,40 @@ static void __vcpu_clear(void *arg)
 {
 	struct kvm_vcpu *vcpu = arg;
 	int cpu = raw_smp_processor_id();
+	unsigned long flags;
 
-	if (vcpu->cpu == cpu)
+	local_irq_save(flags);
+
+	if (vcpu->cpu != -1) {
+		/*
+		 * We should *never* try to __vcpu_clear a remote VMCS. This
+		 * would have been addressed at a higher layer already
+		 */
+		BUG_ON(vcpu->cpu != cpu);
+
+		/*
+		 * Execute the VMCLEAR operation regardless of whether the 
+		 * VMCS is currently active on this CPU or not (it doesn't
+		 * necessarily have to be)
+		 */
 		vmcs_clear(vmx(vcpu)->vmcs);
-	if (per_cpu(current_vmcs, cpu) == vmx(vcpu)->vmcs)
-		per_cpu(current_vmcs, cpu) = NULL;
+
+		/*
+		 * And finally, if this VMCS *was* currently active on this
+		 * CPU, mark the CPU as available again
+		 */
+		if (per_cpu(current_vmcs, cpu) == vmx(vcpu)->vmcs)
+			per_cpu(current_vmcs, cpu) = NULL;
+	} else
+		/*
+		 * If vcpu->cpu thinks we are not installed anywhere,
+		 * but this CPU thinks are are currently active, something is
+		 * wacked.
+		 */
+		BUG_ON(per_cpu(current_vmcs, cpu) == vmx(vcpu)->vmcs);
+
+	local_irq_restore(flags);
+
 	rdtscll(vcpu->host_tsc);
 }
 
@@ -220,6 +263,7 @@ static void vcpu_clear(struct kvm_vcpu *vcpu)
 	else
 		__vcpu_clear(vcpu);
 	vmx(vcpu)->launched = 0;
+	vcpu->cpu           = -1;
 }
 
 static unsigned long vmcs_readl(unsigned long field)
@@ -423,26 +467,33 @@ static void vmx_load_host_state(struct kvm_vcpu *vcpu)
  */
 static void vmx_vcpu_load(struct kvm_vcpu *vcpu)
 {
-	u64 phys_addr = __pa(vmx(vcpu)->vmcs);
 	int cpu;
 	u64 tsc_this, delta;
+	unsigned long flags;
 
 	cpu = get_cpu();
 
 	if (vcpu->cpu != cpu)
 		vcpu_clear(vcpu);
 
-	if (per_cpu(current_vmcs, cpu) != vmx(vcpu)->vmcs) {
-		u8 error;
+	/*
+	 * By the time we get here, we know that either our VMCS was previously
+	 * loaded on the current CPU, or that its not loaded on any logical CPU
+	 * in the system at all due to the vcpu_clear() operation above.
+	 * Either way, we must atomically make sure we are the currently
+	 * loaded pointer
+	 */
+	local_irq_save(flags);
+ 	if (per_cpu(current_vmcs, cpu) != vmx(vcpu)->vmcs) {
+		/*
+		 * Re-establish ourselves as the current VMCS in an unlaunched
+		 * state
+		 */
+		vmcs_load(vmx(vcpu)->vmcs);
+ 		per_cpu(current_vmcs, cpu) = vmx(vcpu)->vmcs;
 
-		per_cpu(current_vmcs, cpu) = vmx(vcpu)->vmcs;
-		asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0"
-			      : "=g"(error) : "a"(&phys_addr), "m"(phys_addr)
-			      : "cc");
-		if (error)
-			printk(KERN_ERR "kvm: vmptrld %p/%llx fail\n",
-			       vmx(vcpu)->vmcs, phys_addr);
 	}
+	local_irq_restore(flags);
 
 	if (vcpu->cpu != cpu) {
 		struct descriptor_table dt;


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware
       [not found]     ` <20070726145210.4847.90637.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
@ 2007-07-26 15:03       ` Avi Kivity
  0 siblings, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2007-07-26 15:03 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Gregory Haskins wrote:
> We need to provide locking around the current_vmcs/VMCS interactions to
> protect against race conditions.
>
>   

Can you explain the race?

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware
@ 2007-07-26 15:15 Gregory Haskins
       [not found] ` <46A882480200005A00028358-Igcdv/6uVdMHoYOw/+koYqIwWpluYiW7@public.gmane.org>
  0 siblings, 1 reply; 9+ messages in thread
From: Gregory Haskins @ 2007-07-26 15:15 UTC (permalink / raw)
  To: avi-atKUWr5tajBWk0Htik3J/w; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Thu, 2007-07-26 at 18:03 +0300, Avi Kivity wrote:
> Gregory Haskins wrote:
> > We need to provide locking around the current_vmcs/VMCS interactions to
> > protect against race conditions.
> >
> >   
> 
> Can you explain the race?

Sure.  It can happen with two VMs are running simultaneously.  Lets call
them VM-a and VM-b.  Assume the scenario: VM-a is on CPU-x, gets
migrated to CPU-y, and VM-b gets scheduled in on CPU-x.  There is a race
on CPU-x with the VMCS handling logic between the VM-b process context,
and the IPI to execute the __vcpu_clear for VM-a. 

Disabling interrupts was chosen as the sync-primitive, because the code
will always be on the CPU in question.



-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware
       [not found] ` <46A882480200005A00028358-Igcdv/6uVdMHoYOw/+koYqIwWpluYiW7@public.gmane.org>
@ 2007-07-26 15:35   ` Avi Kivity
       [not found]     ` <46A8BF26.5030802-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  2007-07-31  9:18   ` [PATCH 2/2] KVM: Protect race-condition betweenVMCS " Dong, Eddie
  1 sibling, 1 reply; 9+ messages in thread
From: Avi Kivity @ 2007-07-26 15:35 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Gregory Haskins wrote:
> On Thu, 2007-07-26 at 18:03 +0300, Avi Kivity wrote:
>   
>> Gregory Haskins wrote:
>>     
>>> We need to provide locking around the current_vmcs/VMCS interactions to
>>> protect against race conditions.
>>>
>>>   
>>>       
>> Can you explain the race?
>>     
>
> Sure.  It can happen with two VMs are running simultaneously.  Lets call
> them VM-a and VM-b.  Assume the scenario: VM-a is on CPU-x, gets
> migrated to CPU-y, and VM-b gets scheduled in on CPU-x.  There is a race
> on CPU-x with the VMCS handling logic between the VM-b process context,
> and the IPI to execute the __vcpu_clear for VM-a. 
>
>   

A race indeed, good catch.

I think the race is only on the per_cpu(current_vmcs) variable, no?  The 
actual vmcs ptr (as loaded by vmptrld) is handled by the processor.

> Disabling interrupts was chosen as the sync-primitive, because the code
> will always be on the CPU in question.
>
>   

Looks a bit heavy handed.  How about replacing (in __vcpu_clear())

    if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
        per_cpu(current_vmcs, cpu) = NULL;

by

    cmpxchg_local(&per_cpu(current_vmcs, cpu), vcpu->vmcs, NULL);

?

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware
@ 2007-07-26 15:40 Gregory Haskins
  0 siblings, 0 replies; 9+ messages in thread
From: Gregory Haskins @ 2007-07-26 15:40 UTC (permalink / raw)
  To: avi-atKUWr5tajBWk0Htik3J/w; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Thu, 2007-07-26 at 18:35 +0300, Avi Kivity wrote:

> A race indeed, good catch.
> 
> I think the race is only on the per_cpu(current_vmcs) variable, no?  The 
> actual vmcs ptr (as loaded by vmptrld) is handled by the processor.

Correct.

> 
> > Disabling interrupts was chosen as the sync-primitive, because the code
> > will always be on the CPU in question.
> >
> >   
> 
> Looks a bit heavy handed.  How about replacing (in __vcpu_clear())
> 
>     if (per_cpu(current_vmcs, cpu) == vcpu->vmcs)
>         per_cpu(current_vmcs, cpu) = NULL;
> 
> by
> 
>     cmpxchg_local(&per_cpu(current_vmcs, cpu), vcpu->vmcs, NULL);
> 
> ?

Hmm...possibly.  I've never worked with the cmpxchg subsystem so let me
look into it a little bit and get back to you.



-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware
       [not found]     ` <46A8BF26.5030802-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-07-26 16:31       ` Avi Kivity
  0 siblings, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2007-07-26 16:31 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Avi Kivity wrote:
>>
>> Sure.  It can happen with two VMs are running simultaneously.  Lets call
>> them VM-a and VM-b.  Assume the scenario: VM-a is on CPU-x, gets
>> migrated to CPU-y, and VM-b gets scheduled in on CPU-x.  There is a race
>> on CPU-x with the VMCS handling logic between the VM-b process context,
>> and the IPI to execute the __vcpu_clear for VM-a.
>>   
>
> A race indeed, good catch.
>
> I think the race is only on the per_cpu(current_vmcs) variable, no?  
> The actual vmcs ptr (as loaded by vmptrld) is handled by the processor.

btw, I think the race is benign.  if __vcpu_clear() wins, vcpu_load() 
gets to set current_vmcs and all is well.  If vcpu_load() wins, 
__vcpu_clear() stomps on current_vmcs, but the only effect of that the 
next time vcpu_load() is called, it issues an unnecessary vmptrld.

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware
@ 2007-07-26 16:40 Gregory Haskins
  0 siblings, 0 replies; 9+ messages in thread
From: Gregory Haskins @ 2007-07-26 16:40 UTC (permalink / raw)
  To: avi-atKUWr5tajBWk0Htik3J/w; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

On Thu, 2007-07-26 at 19:31 +0300, Avi Kivity wrote:
> Avi Kivity wrote:
> >>
> >> Sure.  It can happen with two VMs are running simultaneously.  Lets call
> >> them VM-a and VM-b.  Assume the scenario: VM-a is on CPU-x, gets
> >> migrated to CPU-y, and VM-b gets scheduled in on CPU-x.  There is a race
> >> on CPU-x with the VMCS handling logic between the VM-b process context,
> >> and the IPI to execute the __vcpu_clear for VM-a.
> >>   
> >
> > A race indeed, good catch.
> >
> > I think the race is only on the per_cpu(current_vmcs) variable, no?  
> > The actual vmcs ptr (as loaded by vmptrld) is handled by the processor.
> 
> btw, I think the race is benign.  if __vcpu_clear() wins, vcpu_load() 
> gets to set current_vmcs and all is well.  If vcpu_load() wins, 
> __vcpu_clear() stomps on current_vmcs, but the only effect of that the 
> next time vcpu_load() is called, it issues an unnecessary vmptrld.


Hmm.. Yes I think you are right.  When I first started thinking about
this is when I thought we needed to VMCLEAR the current before the
VMPTRLD, in which case this would be a real bug.  But in light of you
setting me straight on that issue, I think this race drops away too.  We
should probably comment the code just in case current_vmcs gets more
complex in the future so it doesn't get lost ;)



-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] KVM: Protect race-condition betweenVMCS and current_vmcs on VMX hardware
       [not found] ` <46A882480200005A00028358-Igcdv/6uVdMHoYOw/+koYqIwWpluYiW7@public.gmane.org>
  2007-07-26 15:35   ` Avi Kivity
@ 2007-07-31  9:18   ` Dong, Eddie
       [not found]     ` <10EA09EFD8728347A513008B6B0DA77A01DB6650-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  1 sibling, 1 reply; 9+ messages in thread
From: Dong, Eddie @ 2007-07-31  9:18 UTC (permalink / raw)
  To: Gregory Haskins, avi-atKUWr5tajBWk0Htik3J/w
  Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org wrote:
> On Thu, 2007-07-26 at 18:03 +0300, Avi Kivity wrote:
>> Gregory Haskins wrote:
>>> We need to provide locking around the current_vmcs/VMCS
>>> interactions to protect against race conditions.
>>> 
>>> 
>> 
>> Can you explain the race?
> 
> Sure.  It can happen with two VMs are running simultaneously.
> Lets call
> them VM-a and VM-b.  Assume the scenario: VM-a is on CPU-x, gets
> migrated to CPU-y, and VM-b gets scheduled in on CPU-x.  There
> is a race
> on CPU-x with the VMCS handling logic between the VM-b process
> context, and the IPI to execute the __vcpu_clear for VM-a.

I may miss something, why does that matter? __vcpu_clear will eventually
get executed though it is a little bit delayed. vmclear will eventually
dump 
internal state of VM-a VMCS to memory and VM-b get its own VMCS 
loaded.  Here the point is vmclear has a parameter to identify which
VM's VMCS to dump, not only a memory address. Jun, please correct me if
I am wrong.

> 
> Disabling interrupts was chosen as the sync-primitive, because the
> code will always be on the CPU in question.
> 

thx, eddie

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/2] KVM: Protect race-condition betweenVMCS and current_vmcs on VMX hardware
       [not found]     ` <10EA09EFD8728347A513008B6B0DA77A01DB6650-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-07-31  9:22       ` Avi Kivity
  0 siblings, 0 replies; 9+ messages in thread
From: Avi Kivity @ 2007-07-31  9:22 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f

Dong, Eddie wrote:
> kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org wrote:
>   
>> On Thu, 2007-07-26 at 18:03 +0300, Avi Kivity wrote:
>>     
>>> Gregory Haskins wrote:
>>>       
>>>> We need to provide locking around the current_vmcs/VMCS
>>>> interactions to protect against race conditions.
>>>>
>>>>
>>>>         
>>> Can you explain the race?
>>>       
>> Sure.  It can happen with two VMs are running simultaneously.
>> Lets call
>> them VM-a and VM-b.  Assume the scenario: VM-a is on CPU-x, gets
>> migrated to CPU-y, and VM-b gets scheduled in on CPU-x.  There
>> is a race
>> on CPU-x with the VMCS handling logic between the VM-b process
>> context, and the IPI to execute the __vcpu_clear for VM-a.
>>     
>
> I may miss something, why does that matter? __vcpu_clear will eventually
> get executed though it is a little bit delayed. vmclear will eventually
> dump 
> internal state of VM-a VMCS to memory and VM-b get its own VMCS 
> loaded.  Here the point is vmclear has a parameter to identify which
> VM's VMCS to dump, not only a memory address. Jun, please correct me if
> I am wrong.
>
>   

The vmclear instruction itself cannot race (because, as you say, the 
vmcs is a parameter).  However access to the current_vmcs variable is 
racy.  The race is benign and cannot lead to any problems, so we're not 
changing any code for that.


-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2007-07-31  9:22 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-26 15:15 [PATCH 2/2] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware Gregory Haskins
     [not found] ` <46A882480200005A00028358-Igcdv/6uVdMHoYOw/+koYqIwWpluYiW7@public.gmane.org>
2007-07-26 15:35   ` Avi Kivity
     [not found]     ` <46A8BF26.5030802-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-07-26 16:31       ` Avi Kivity
2007-07-31  9:18   ` [PATCH 2/2] KVM: Protect race-condition betweenVMCS " Dong, Eddie
     [not found]     ` <10EA09EFD8728347A513008B6B0DA77A01DB6650-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-07-31  9:22       ` Avi Kivity
  -- strict thread matches above, loose matches on Subject: below --
2007-07-26 16:40 [PATCH 2/2] KVM: Protect race-condition between VMCS " Gregory Haskins
2007-07-26 15:40 Gregory Haskins
2007-07-26 14:51 [PATCH 0/2] Arch cleanup v3 Gregory Haskins
     [not found] ` <20070726144602.4847.64724.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
2007-07-26 14:52   ` [PATCH 2/2] KVM: Protect race-condition between VMCS and current_vmcs on VMX hardware Gregory Haskins
     [not found]     ` <20070726145210.4847.90637.stgit-sLgBBP33vUGnsjUZhwzVf9HuzzzSOjJt@public.gmane.org>
2007-07-26 15:03       ` Avi Kivity

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox