public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* Remove APIC lock
@ 2007-08-24 13:08 Dong, Eddie
       [not found] ` <10EA09EFD8728347A513008B6B0DA77A01F84594-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Dong, Eddie @ 2007-08-24 13:08 UTC (permalink / raw)
  To: kvm-devel

[-- Attachment #1: Type: text/plain, Size: 8023 bytes --]

Avi:

    apic->lock is used in many place to avoid race condition with apic
timer call back
    function which may run on different pCPU. This patch migrate the
apic timer to
    same CPU with the one VP runs on, thus the lock is no longer
necessary.

thx,eddie

    Signed-off-by: Yaozu (Eddie) Dong <Eddie.Dong-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>


diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index b6283d2..f324cfb 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -106,7 +106,6 @@ struct kvm_ioapic {
 };
 
 struct kvm_lapic {
-	spinlock_t lock;	/* TODO for revise */
 	unsigned long base_address;
 	struct kvm_io_device dev;
 	struct {
@@ -159,5 +158,6 @@ void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu,
int vec);
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
index 751ec03..d9385a5 100644
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -176,9 +176,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu
*vcpu)
 
 	if (!apic)
 		return 0;
-	spin_lock_bh(&apic->lock);
 	highest_irr = apic_find_highest_irr(apic);
-	spin_unlock_bh(&apic->lock);
 
 	return highest_irr;
 }
@@ -525,9 +523,7 @@ static void apic_mmio_read(struct kvm_io_device
*this,
 		       (unsigned long)address, len);
 		return;
 	}
-	spin_lock_bh(&apic->lock);
 	result = __apic_read(apic, offset & ~0xf);
-	spin_unlock_bh(&apic->lock);
 
 	switch (len) {
 	case 1:
@@ -571,7 +567,6 @@ static void apic_mmio_write(struct kvm_io_device
*this,
 
 	offset &= 0xff0;
 
-	spin_lock_bh(&apic->lock);
 	switch (offset) {
 	case APIC_ID:		/* Local APIC ID */
 		apic_set_reg(apic, APIC_ID, val);
@@ -645,7 +640,6 @@ static void apic_mmio_write(struct kvm_io_device
*this,
 			    APIC_BUS_CYCLE_NS * apic->timer.divide_count
* val;
 
 			/* Make sure the lock ordering is coherent */
-			spin_unlock_bh(&apic->lock);
 			hrtimer_cancel(&apic->timer.dev);
 			atomic_set(&apic->timer.pending, 0);
 			hrtimer_start(&apic->timer.dev,
@@ -687,7 +681,6 @@ static void apic_mmio_write(struct kvm_io_device
*this,
 		break;
 	}
 
-	spin_unlock_bh(&apic->lock);
 }
 
 static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
@@ -695,15 +688,12 @@ static int apic_mmio_range(struct kvm_io_device
*this, gpa_t addr)
 	struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
 	int ret = 0;
 
-	spin_lock_bh(&apic->lock);
 
 	if (apic_hw_enabled(apic) &&
 	    (addr >= apic->base_address) &&
 	    (addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
 		ret = 1;
 
-	spin_unlock_bh(&apic->lock);
-
 	return ret;
 }
 
@@ -711,7 +701,6 @@ void kvm_free_apic(struct kvm_lapic *apic)
 {
 	if (!apic)
 		return;
-	spin_lock_bh(&apic->lock);
 
 	hrtimer_cancel(&apic->timer.dev);
 
@@ -720,8 +709,6 @@ void kvm_free_apic(struct kvm_lapic *apic)
 		apic->regs_page = 0;
 	}
 
-	spin_unlock_bh(&apic->lock);
-
 	kfree(apic);
 }
 
@@ -737,9 +724,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu,
unsigned long cr8)
 
 	if (!apic)
 		return;
-	spin_lock_bh(&apic->lock);
 	apic_set_tpr(apic, ((cr8 & 0x0f) << 4));
-	spin_unlock_bh(&apic->lock);
 }
 
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
@@ -749,9 +734,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 
 	if (!apic)
 		return 0;
-	spin_lock_bh(&apic->lock);
 	tpr = (u64) apic_get_reg(apic, APIC_TASKPRI);
-	spin_unlock_bh(&apic->lock);
 
 	return (tpr & 0xf0) >> 4;
 }
@@ -766,7 +749,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64
value)
 		vcpu->apic_base = value;
 		return;
 	}
-	spin_lock_bh(&apic->lock);
 	if (apic->vcpu->vcpu_id)
 		value &= ~MSR_IA32_APICBASE_BSP;
 
@@ -778,19 +760,11 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64
value)
 	apic_debug("apic base msr is 0x%016" PRIx64 ", and base address
is "
 		   "0x%lx.\n", vcpu->apic_base, apic->base_address);
 
-	spin_unlock_bh(&apic->lock);
 }
 
 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
-	u64 base;
-
-	spin_lock_bh(&apic->lock);
-	base = vcpu->apic_base;
-	spin_unlock_bh(&apic->lock);
-
-	return base;
+	return vcpu->apic_base;
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
 
@@ -808,8 +782,6 @@ static void lapic_reset(struct kvm_vcpu *vcpu)
 	/* Stop the timer in case it's a reset to an active apic */
 	hrtimer_cancel(&apic->timer.dev);
 
-	spin_lock_bh(&apic->lock);
-
 	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
 	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
 
@@ -836,8 +808,6 @@ static void lapic_reset(struct kvm_vcpu *vcpu)
 		vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
 	apic_update_ppr(apic);
 
-	spin_unlock_bh(&apic->lock);
-
 	apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
 		   "0x%016" PRIx64 ", base_address=0x%0lx.\n",
__FUNCTION__,
 		   vcpu, GET_APIC_ID(apic_get_reg(apic, APIC_ID)),
@@ -851,24 +821,13 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
 
 	if (!apic)
 		return 0;
-	spin_lock_bh(&apic->lock);
 	ret = apic_enabled(apic);
-	spin_unlock_bh(&apic->lock);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
 
 /*
-void *kvm_lapic_get_regs(struct kvm_vcpu *vcpu)
-{
-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
-	return apic->regs;
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_get_regs);
-*/
-
-/*
 
*----------------------------------------------------------------------
  * timer interface
 
*----------------------------------------------------------------------
@@ -907,9 +866,7 @@ static enum hrtimer_restart apic_timer_fn(struct
hrtimer *data)
 
 	apic = container_of(data, struct kvm_lapic, timer.dev);
 
-	spin_lock_bh(&apic->lock);
 	restart_timer = __apic_timer_fn(apic);
-	spin_unlock_bh(&apic->lock);
 
 	if (restart_timer)
 		return HRTIMER_RESTART;
@@ -929,7 +886,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 		goto nomem;
 
 	vcpu->apic = apic;
-	spin_lock_init(&apic->lock);
 
 	apic->regs_page = alloc_page(GFP_KERNEL);
 	if (apic->regs_page == NULL) {
@@ -1024,7 +980,6 @@ void kvm_apic_post_state_restore(struct kvm_vcpu
*vcpu)
 	apic_update_ppr(apic);
 
 	/* TODO: following code can be in a common API */
-	spin_lock_bh(&apic->lock);
 	hrtimer_cancel(&apic->timer.dev);
 	atomic_set(&apic->timer.pending, 0);
 	val = apic_get_reg(apic, APIC_TDCR);
@@ -1037,5 +992,18 @@ void kvm_apic_post_state_restore(struct kvm_vcpu
*vcpu)
 	hrtimer_start(&apic->timer.dev,
 		      ktime_add_ns(now, offset),
 		      HRTIMER_MODE_ABS);
-	spin_unlock_bh(&apic->lock);
 }
+
+void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->apic;
+	struct hrtimer *timer;
+
+	if (apic) {
+		timer = &apic->timer.dev;
+		hrtimer_cancel(timer);
+		hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
+	}
+}
+EXPORT_SYMBOL_GPL(kvm_migrate_apic_timer);
+
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index c64fe6d..c584626 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -633,6 +633,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int
cpu)
 		delta = vcpu->host_tsc - tsc_this;
 		svm->vmcb->control.tsc_offset += delta;
 		vcpu->cpu = cpu;
+		kvm_migrate_apic_timer(vcpu);
 	}
 
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index b537104..5ea1132 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -441,8 +441,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu,
int cpu)
 	u64 phys_addr = __pa(vmx->vmcs);
 	u64 tsc_this, delta;
 
-	if (vcpu->cpu != cpu)
+	if (vcpu->cpu != cpu) {
 		vcpu_clear(vmx);
+		kvm_migrate_apic_timer(vcpu);
+	}
 
 	if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
 		u8 error;

[-- Attachment #2: 4-lock.patch --]
[-- Type: application/octet-stream, Size: 7656 bytes --]

    apic->lock is used in many place to avoid
    race condition with apic timer call back
    function which may run on different pCPU.
    This patch migrate the apic timer to be
    same CPU with the one VP runs on.

    Signed-off-by: Yaozu (Eddie) Dong <Eddie.Dong@intel.com>


diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index b6283d2..f324cfb 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -106,7 +106,6 @@ struct kvm_ioapic {
 };
 
 struct kvm_lapic {
-	spinlock_t lock;	/* TODO for revise */
 	unsigned long base_address;
 	struct kvm_io_device dev;
 	struct {
@@ -159,5 +158,6 @@ void kvm_apic_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
+void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
index 751ec03..d9385a5 100644
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -176,9 +176,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 
 	if (!apic)
 		return 0;
-	spin_lock_bh(&apic->lock);
 	highest_irr = apic_find_highest_irr(apic);
-	spin_unlock_bh(&apic->lock);
 
 	return highest_irr;
 }
@@ -525,9 +523,7 @@ static void apic_mmio_read(struct kvm_io_device *this,
 		       (unsigned long)address, len);
 		return;
 	}
-	spin_lock_bh(&apic->lock);
 	result = __apic_read(apic, offset & ~0xf);
-	spin_unlock_bh(&apic->lock);
 
 	switch (len) {
 	case 1:
@@ -571,7 +567,6 @@ static void apic_mmio_write(struct kvm_io_device *this,
 
 	offset &= 0xff0;
 
-	spin_lock_bh(&apic->lock);
 	switch (offset) {
 	case APIC_ID:		/* Local APIC ID */
 		apic_set_reg(apic, APIC_ID, val);
@@ -645,7 +640,6 @@ static void apic_mmio_write(struct kvm_io_device *this,
 			    APIC_BUS_CYCLE_NS * apic->timer.divide_count * val;
 
 			/* Make sure the lock ordering is coherent */
-			spin_unlock_bh(&apic->lock);
 			hrtimer_cancel(&apic->timer.dev);
 			atomic_set(&apic->timer.pending, 0);
 			hrtimer_start(&apic->timer.dev,
@@ -687,7 +681,6 @@ static void apic_mmio_write(struct kvm_io_device *this,
 		break;
 	}
 
-	spin_unlock_bh(&apic->lock);
 }
 
 static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
@@ -695,15 +688,12 @@ static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr)
 	struct kvm_lapic *apic = (struct kvm_lapic *)this->private;
 	int ret = 0;
 
-	spin_lock_bh(&apic->lock);
 
 	if (apic_hw_enabled(apic) &&
 	    (addr >= apic->base_address) &&
 	    (addr < (apic->base_address + LAPIC_MMIO_LENGTH)))
 		ret = 1;
 
-	spin_unlock_bh(&apic->lock);
-
 	return ret;
 }
 
@@ -711,7 +701,6 @@ void kvm_free_apic(struct kvm_lapic *apic)
 {
 	if (!apic)
 		return;
-	spin_lock_bh(&apic->lock);
 
 	hrtimer_cancel(&apic->timer.dev);
 
@@ -720,8 +709,6 @@ void kvm_free_apic(struct kvm_lapic *apic)
 		apic->regs_page = 0;
 	}
 
-	spin_unlock_bh(&apic->lock);
-
 	kfree(apic);
 }
 
@@ -737,9 +724,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 
 	if (!apic)
 		return;
-	spin_lock_bh(&apic->lock);
 	apic_set_tpr(apic, ((cr8 & 0x0f) << 4));
-	spin_unlock_bh(&apic->lock);
 }
 
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
@@ -749,9 +734,7 @@ u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 
 	if (!apic)
 		return 0;
-	spin_lock_bh(&apic->lock);
 	tpr = (u64) apic_get_reg(apic, APIC_TASKPRI);
-	spin_unlock_bh(&apic->lock);
 
 	return (tpr & 0xf0) >> 4;
 }
@@ -766,7 +749,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 		vcpu->apic_base = value;
 		return;
 	}
-	spin_lock_bh(&apic->lock);
 	if (apic->vcpu->vcpu_id)
 		value &= ~MSR_IA32_APICBASE_BSP;
 
@@ -778,19 +760,11 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
 		   "0x%lx.\n", vcpu->apic_base, apic->base_address);
 
-	spin_unlock_bh(&apic->lock);
 }
 
 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
-	u64 base;
-
-	spin_lock_bh(&apic->lock);
-	base = vcpu->apic_base;
-	spin_unlock_bh(&apic->lock);
-
-	return base;
+	return vcpu->apic_base;
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_get_base);
 
@@ -808,8 +782,6 @@ static void lapic_reset(struct kvm_vcpu *vcpu)
 	/* Stop the timer in case it's a reset to an active apic */
 	hrtimer_cancel(&apic->timer.dev);
 
-	spin_lock_bh(&apic->lock);
-
 	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
 	apic_set_reg(apic, APIC_LVR, APIC_VERSION);
 
@@ -836,8 +808,6 @@ static void lapic_reset(struct kvm_vcpu *vcpu)
 		vcpu->apic_base |= MSR_IA32_APICBASE_BSP;
 	apic_update_ppr(apic);
 
-	spin_unlock_bh(&apic->lock);
-
 	apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr="
 		   "0x%016" PRIx64 ", base_address=0x%0lx.\n", __FUNCTION__,
 		   vcpu, GET_APIC_ID(apic_get_reg(apic, APIC_ID)),
@@ -851,24 +821,13 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
 
 	if (!apic)
 		return 0;
-	spin_lock_bh(&apic->lock);
 	ret = apic_enabled(apic);
-	spin_unlock_bh(&apic->lock);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_enabled);
 
 /*
-void *kvm_lapic_get_regs(struct kvm_vcpu *vcpu)
-{
-	struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
-	return apic->regs;
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_get_regs);
-*/
-
-/*
  *----------------------------------------------------------------------
  * timer interface
  *----------------------------------------------------------------------
@@ -907,9 +866,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 
 	apic = container_of(data, struct kvm_lapic, timer.dev);
 
-	spin_lock_bh(&apic->lock);
 	restart_timer = __apic_timer_fn(apic);
-	spin_unlock_bh(&apic->lock);
 
 	if (restart_timer)
 		return HRTIMER_RESTART;
@@ -929,7 +886,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 		goto nomem;
 
 	vcpu->apic = apic;
-	spin_lock_init(&apic->lock);
 
 	apic->regs_page = alloc_page(GFP_KERNEL);
 	if (apic->regs_page == NULL) {
@@ -1024,7 +980,6 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 	apic_update_ppr(apic);
 
 	/* TODO: following code can be in a common API */
-	spin_lock_bh(&apic->lock);
 	hrtimer_cancel(&apic->timer.dev);
 	atomic_set(&apic->timer.pending, 0);
 	val = apic_get_reg(apic, APIC_TDCR);
@@ -1037,5 +992,18 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 	hrtimer_start(&apic->timer.dev,
 		      ktime_add_ns(now, offset),
 		      HRTIMER_MODE_ABS);
-	spin_unlock_bh(&apic->lock);
 }
+
+void kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_lapic *apic = vcpu->apic;
+	struct hrtimer *timer;
+
+	if (apic) {
+		timer = &apic->timer.dev;
+		hrtimer_cancel(timer);
+		hrtimer_start(timer, timer->expires, HRTIMER_MODE_ABS);
+	}
+}
+EXPORT_SYMBOL_GPL(kvm_migrate_apic_timer);
+
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index c64fe6d..c584626 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -633,6 +633,7 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 		delta = vcpu->host_tsc - tsc_this;
 		svm->vmcb->control.tsc_offset += delta;
 		vcpu->cpu = cpu;
+		kvm_migrate_apic_timer(vcpu);
 	}
 
 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index b537104..5ea1132 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -441,8 +441,10 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 	u64 phys_addr = __pa(vmx->vmcs);
 	u64 tsc_this, delta;
 
-	if (vcpu->cpu != cpu)
+	if (vcpu->cpu != cpu) {
 		vcpu_clear(vmx);
+		kvm_migrate_apic_timer(vcpu);
+	}
 
 	if (per_cpu(current_vmcs, cpu) != vmx->vmcs) {
 		u8 error;

[-- Attachment #3: Type: text/plain, Size: 315 bytes --]

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

[-- Attachment #4: Type: text/plain, Size: 186 bytes --]

_______________________________________________
kvm-devel mailing list
kvm-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org
https://lists.sourceforge.net/lists/listinfo/kvm-devel

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
@ 2007-08-24 13:47 Gregory Haskins
       [not found] ` <1187963266.4231.0.camel-5CR4LY5GPkvLDviKLk5550HKjMygAv58XqFh9Ls21Oc@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Gregory Haskins @ 2007-08-24 13:47 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm-devel

On Fri, 2007-08-24 at 21:08 +0800, Dong, Eddie wrote:
> Avi:
> 
>     apic->lock is used in many place to avoid race condition with apic
> timer call back
>     function which may run on different pCPU. This patch migrate the
> apic timer to
>     same CPU with the one VP runs on, thus the lock is no longer
> necessary.
> 

What about sources that can inject interrupts besides the timer?  (E.g.
in-kernel PV drivers)


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found] ` <1187963266.4231.0.camel-5CR4LY5GPkvLDviKLk5550HKjMygAv58XqFh9Ls21Oc@public.gmane.org>
@ 2007-08-24 13:55   ` Dong, Eddie
       [not found]     ` <10EA09EFD8728347A513008B6B0DA77A01F845A5-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  2007-08-24 14:39     ` Gregory Haskins
  0 siblings, 2 replies; 15+ messages in thread
From: Dong, Eddie @ 2007-08-24 13:55 UTC (permalink / raw)
  To: Gregory Haskins; +Cc: kvm-devel

Gregory Haskins wrote:
> On Fri, 2007-08-24 at 21:08 +0800, Dong, Eddie wrote:
>> Avi:
>> 
>>     apic->lock is used in many place to avoid race condition with
>>     apic timer call back function which may run on different pCPU.
>>     This patch migrate the apic timer to same CPU with the one VP
>> runs on, thus the lock is no longer necessary. 
>> 
> 
> What about sources that can inject interrupts besides the timer? 
> (E.g. in-kernel PV drivers)

Injecting IRQ is OK, since it is just operation to IRR register which we
can 
use atomic operations. Xen also do in that way.

Eddie

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found]     ` <10EA09EFD8728347A513008B6B0DA77A01F845A5-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-08-24 14:24       ` Dong, Eddie
  0 siblings, 0 replies; 15+ messages in thread
From: Dong, Eddie @ 2007-08-24 14:24 UTC (permalink / raw)
  To: Dong, Eddie, Gregory Haskins; +Cc: kvm-devel

kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org wrote:
> Gregory Haskins wrote:
>> On Fri, 2007-08-24 at 21:08 +0800, Dong, Eddie wrote:
>>> Avi:
>>> 
>>>     apic->lock is used in many place to avoid race condition with
>>>     apic timer call back function which may run on different pCPU.
>>>     This patch migrate the apic timer to same CPU with the one VP
>>> runs on, thus the lock is no longer necessary.
>>> 
>> 
>> What about sources that can inject interrupts besides the timer?
>> (E.g. in-kernel PV drivers)
> 
> Injecting IRQ is OK, since it is just operation to IRR
> register which we
> can
> use atomic operations. Xen also do in that way.
> 
O, said too quick. Xen current has evolved to  be protected by a
bigger irqlock for both APIC & IOAPIC, and PIC uses per chip lock.

For our case, PIC/IOAPIC now is using kvm->lock. So APIC is working
with kvm->lock too. But this lock may be too big for pv driver. We may
need to think of a solution to cover both APIC & IOAPIC in future.
Eddie

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
  2007-08-24 13:55   ` Dong, Eddie
       [not found]     ` <10EA09EFD8728347A513008B6B0DA77A01F845A5-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-08-24 14:39     ` Gregory Haskins
  1 sibling, 0 replies; 15+ messages in thread
From: Gregory Haskins @ 2007-08-24 14:39 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm-devel

On Fri, 2007-08-24 at 22:24 +0800, Dong, Eddie wrote:
> kvm-devel-bounces-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f@public.gmane.org wrote:
> > Gregory Haskins wrote:
> >> On Fri, 2007-08-24 at 21:08 +0800, Dong, Eddie wrote:
> >>> Avi:
> >>> 
> >>>     apic->lock is used in many place to avoid race condition with
> >>>     apic timer call back function which may run on different pCPU.
> >>>     This patch migrate the apic timer to same CPU with the one VP
> >>> runs on, thus the lock is no longer necessary.
> >>> 
> >> 
> >> What about sources that can inject interrupts besides the timer?
> >> (E.g. in-kernel PV drivers)
> > 
> > Injecting IRQ is OK, since it is just operation to IRR
> > register which we
> > can
> > use atomic operations. Xen also do in that way.
> > 
> O, said too quick. Xen current has evolved to  be protected by a
> bigger irqlock for both APIC & IOAPIC, and PIC uses per chip lock.
> 
> For our case, PIC/IOAPIC now is using kvm->lock. So APIC is working
> with kvm->lock too. But this lock may be too big for pv driver. We may
> need to think of a solution to cover both APIC & IOAPIC in future.

Yeah, I would highly recommend you make this more fine grained.  For
example, I had a vcpu->irq.lock, and a per-chip lock (e.g. one per apic,
per pic, (and one per ioapic but I never got there)).  Event injection
is a hot-spot, so coarse locking is probably going to cause performance
deficiencies.


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found] ` <10EA09EFD8728347A513008B6B0DA77A01F84594-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-08-25  8:05   ` Avi Kivity
       [not found]     ` <46CFE2C6.5020006-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  2007-09-03 13:48   ` Avi Kivity
  1 sibling, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2007-08-25  8:05 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm-devel

Dong, Eddie wrote:
> Avi:
>
>     apic->lock is used in many place to avoid race condition with apic
> timer call back
>     function which may run on different pCPU. This patch migrate the
> apic timer to
>     same CPU with the one VP runs on, thus the lock is no longer
> necessary.
>
> thx,eddie
>
>   


What about preemption:

- vcpu executes lapic code in qemu process context
- process is preempted
- timer fires, touches lapic code

Furthermore, I question the necessity of this.  Taking a spinlock is a
couple dozen cycles on modern processors.  Entering the guest is a
couple thousand.  So what are we saving?

(migrating the timer is a good thing though).

A different approach might be to wake up the vcpu like we do for irr,
with kvm_vcpu_kick(), and let the timer code be handled in process
context, so no locks need be taken.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found]     ` <46CFE2C6.5020006-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-08-25 14:16       ` Dong, Eddie
       [not found]         ` <10EA09EFD8728347A513008B6B0DA77A01F84646-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Dong, Eddie @ 2007-08-25 14:16 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel

> What about preemption:
> 
> - vcpu executes lapic code in qemu process context

Don't understand. LAPIC is in kernel, how can qemu access?
If you mean qemu is calling APIC KVM syscall, then it already
disabled preemption & take kvm->lock.


> - process is preempted
> - timer fires, touches lapic code

See above.

> 
> Furthermore, I question the necessity of this.  Taking a spinlock is a
> couple dozen cycles on modern processors.  Entering the guest is a
> couple thousand.  So what are we saving?

I am stuck, can u explain more? We didn't enter guest more than before.

> 
> (migrating the timer is a good thing though).
> 
> A different approach might be to wake up the vcpu like we do for irr,
> with kvm_vcpu_kick(), and let the timer code be handled in process
> context, so no locks need be taken.
> 

This can be too, but will that be much efficient than timer migration?
Usually the guest APIC timer is around 1ms, which means we have
to VM Exit guest 1000HZ with cost of thousands of cycles each time.
While the process migration can only happen at scheduler quantum 
(10ms?) and highly depend on the scheduler policy. I guess the cost
of hrtimer migration won;t exceed 1000 cycles in modern processor.
And today the migration cost is already exceeds hundreds of cycles, 
add this additional one won;t change many. So I think the cost using
hrtimer migration is much cheap than kvm_vcpu_kick(), but I may miss
something.


Eddie

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found]         ` <10EA09EFD8728347A513008B6B0DA77A01F84646-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-08-25 14:44           ` Avi Kivity
       [not found]             ` <46D04066.4060206-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2007-08-25 14:44 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm-devel

Dong, Eddie wrote:
>> What about preemption:
>>
>> - vcpu executes lapic code in qemu process context
>>     
>
> Don't understand. LAPIC is in kernel, how can qemu access?
> If you mean qemu is calling APIC KVM syscall, then it already
> disabled preemption & take kvm->lock.
>
>
>   

I meant qemu executes the KVM_VCPU_RUN ioctl.  kvm->lock does not 
disable preemption (it is a mutex).

Do we really take kvm->lock for local accesses?  That's a significant 
problem, much more than the timer.

>> - process is preempted
>> - timer fires, touches lapic code
>>     
>
> See above.
>
>   
>> Furthermore, I question the necessity of this.  Taking a spinlock is a
>> couple dozen cycles on modern processors.  Entering the guest is a
>> couple thousand.  So what are we saving?
>>     
>
> I am stuck, can u explain more? We didn't enter guest more than before.
>
>   

What I'm saying is that the performance improvement is negligible, 
because the VT switch dominates the time.  Taking the lock is fast in 
comparison.

However I do like the simplification that removing the lock brings.

>> (migrating the timer is a good thing though).
>>
>> A different approach might be to wake up the vcpu like we do for irr,
>> with kvm_vcpu_kick(), and let the timer code be handled in process
>> context, so no locks need be taken.
>>
>>     
>
> This can be too, but will that be much efficient than timer migration?
> Usually the guest APIC timer is around 1ms, which means we have
> to VM Exit guest 1000HZ with cost of thousands of cycles each time.
> While the process migration can only happen at scheduler quantum 
> (10ms?) and highly depend on the scheduler policy. I guess the cost
> of hrtimer migration won;t exceed 1000 cycles in modern processor.
> And today the migration cost is already exceeds hundreds of cycles, 
> add this additional one won;t change many. So I think the cost using
> hrtimer migration is much cheap than kvm_vcpu_kick(), but I may miss
> something.
>
>   

I meant in addition to timer migration (I really like the timer 
migration part -- it's much more important than lock removal for 
performance). kvm_vcpu_kick() is needed to wake up from halt, or if we 
have races between the timer and task migration.

I'm thinking about something like the remote tlb flush logic:

- hrtimer callback sets a bit in vcpu->requests and calls kvm_vcpu_kick().
   - if the hrtimer and the vcpu are co-located, kvm_vcpu_kick() does 
nothing
   - if the guest is running on another cpu, it sends an IPI
   - if the vcpu is halted, it wakes it up
- when we reenter the guest again, we examine vcpu->requests; if 
KVM_REQ_LAPIC is set we know the timer expired and we run the lapic code 
(where we can examine the pending count).
- we also need to update hlt sleep to look at vcpu->requests

So, to summarize:
- timer migration always helps, regardless of what we do
- lock removal helps, IMO, not performance, but stability and maintainablity
- kvm is preemptible except in vcpu_load() and guest entry, so if we do 
lock removal we need to do everything in task context (nothing in 
interrupts except raising bits like irr)


Opinions?


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found]             ` <46D04066.4060206-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-08-25 15:14               ` Avi Kivity
  2007-08-25 15:25               ` Dong, Eddie
  1 sibling, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2007-08-25 15:14 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm-devel

Avi Kivity wrote:
>
> Do we really take kvm->lock for local accesses?  That's a significant 
> problem, much more than the timer.

Actually, we must take the lock during emulation.  But maybe we can 
change it to a reader/writer lock, and certainly we can drop it during 
lapic access.


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found]             ` <46D04066.4060206-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  2007-08-25 15:14               ` Avi Kivity
@ 2007-08-25 15:25               ` Dong, Eddie
       [not found]                 ` <10EA09EFD8728347A513008B6B0DA77A01F8464D-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  1 sibling, 1 reply; 15+ messages in thread
From: Dong, Eddie @ 2007-08-25 15:25 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel

Avi Kivity wrote:
> Dong, Eddie wrote:
>>> What about preemption:
>>> 
>>> - vcpu executes lapic code in qemu process context
>>> 
>> 
>> Don't understand. LAPIC is in kernel, how can qemu access?
>> If you mean qemu is calling APIC KVM syscall, then it already
>> disabled preemption & take kvm->lock.
>> 
>> 
>> 
> 
> I meant qemu executes the KVM_VCPU_RUN ioctl.  kvm->lock does not
> disable preemption (it is a mutex).

Just noticed it is changed to mutex, but seems same here :-)
If the process is switched to other task, it is OK since it won't access
local
APIC. Current VP access to APIC will take the mutex first (see below).
Or you are talking other corner case?

> 
> Do we really take kvm->lock for local accesses?  That's a significant
> problem, much more than the timer.

Today all APIC/IOAPIC access comes from shadow page fault which already
take kvm->lock. KVM_IRQ_LINE will take too. (just noticed the
save/restore part
missed this one, will add later if we agree here). PIC access comes from
kernel_pio which takes the mutex too.

Another missing place is vmx_intr_assist which needs to take the mutex
too.
Will add later.

> 
>>> - process is preempted
>>> - timer fires, touches lapic code
>>> 
>> 
>> See above.
>> 
>> 
>>> Furthermore, I question the necessity of this.  Taking a spinlock
>>> is a couple dozen cycles on modern processors.  Entering the guest
>>> is a couple thousand.  So what are we saving?
>>> 
>> 
>> I am stuck, can u explain more? We didn't enter guest more than
>> before. 
>> 
>> 
> 
> What I'm saying is that the performance improvement is negligible,
> because the VT switch dominates the time.  Taking the lock is fast in
> comparison. 
> 
> However I do like the simplification that removing the lock brings.

Yes, I do this for simplification only, I didn't expect any performance
difference.
I met recursive lock taking case in debug, so just want to revisit it
now.

> 
>>> (migrating the timer is a good thing though).
>>> 
>>> A different approach might be to wake up the vcpu like we do for
>>> irr, with kvm_vcpu_kick(), and let the timer code be handled in
>>> process context, so no locks need be taken.
>>> 
>>> 
>> 
>> This can be too, but will that be much efficient than timer
>> migration? Usually the guest APIC timer is around 1ms, which means
>> we have 
>> to VM Exit guest 1000HZ with cost of thousands of cycles each time.
>> While the process migration can only happen at scheduler quantum
>> (10ms?) and highly depend on the scheduler policy. I guess the cost
>> of hrtimer migration won;t exceed 1000 cycles in modern processor.
>> And today the migration cost is already exceeds hundreds of cycles,
>> add this additional one won;t change many. So I think the cost using
>> hrtimer migration is much cheap than kvm_vcpu_kick(), but I may miss
>> something. 
>> 
>> 
> 
> I meant in addition to timer migration (I really like the timer
> migration part -- it's much more important than lock removal for
> performance). kvm_vcpu_kick() is needed to wake up from halt, or if we
> have races between the timer and task migration.

:-)  Actually we have solved this issue in previous patch and this one
naturally.
In adding back APIC timer IRQ patch, we will wakeup the halt vCPU.

In this patch since hrtimer always run in same pCPU with guest VP
(when VP is active), each time when hrtime fires (comes from a hardware
IRQ), 
it already VM Exit to kernel (similar function with kvm_vcpu_kick but 
no need to explicitly call it) and then we do IRQ injection at
vmx_intr_assist time.

> 
> I'm thinking about something like the remote tlb flush logic:
> 
> - hrtimer callback sets a bit in vcpu->requests and calls
> kvm_vcpu_kick(). 
>   - if the hrtimer and the vcpu are co-located, kvm_vcpu_kick() does
> nothing 
>   - if the guest is running on another cpu, it sends an IPI
>   - if the vcpu is halted, it wakes it up
> - when we reenter the guest again, we examine vcpu->requests; if
> KVM_REQ_LAPIC is set we know the timer expired and we run the
> lapic code
> (where we can examine the pending count).
> - we also need to update hlt sleep to look at vcpu->requests
> 
> So, to summarize:
> - timer migration always helps, regardless of what we do
> - lock removal helps, IMO, not performance, but stability and
> maintainablity 
> - kvm is preemptible except in vcpu_load() and guest entry, so
> if we do
> lock removal we need to do everything in task context (nothing in
> interrupts except raising bits like irr)
> 
> 
> Opinions?

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found]                 ` <10EA09EFD8728347A513008B6B0DA77A01F8464D-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-08-25 15:54                   ` Avi Kivity
       [not found]                     ` <46D050AB.2000900-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2007-08-25 15:54 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm-devel

Dong, Eddie wrote:
> Avi Kivity wrote:
>   
>> Dong, Eddie wrote:
>>     
>>>> What about preemption:
>>>>
>>>> - vcpu executes lapic code in qemu process context
>>>>
>>>>         
>>> Don't understand. LAPIC is in kernel, how can qemu access?
>>> If you mean qemu is calling APIC KVM syscall, then it already
>>> disabled preemption & take kvm->lock.
>>>
>>>
>>>
>>>       
>> I meant qemu executes the KVM_VCPU_RUN ioctl.  kvm->lock does not
>> disable preemption (it is a mutex).
>>     
>
> Just noticed it is changed to mutex, but seems same here :-)
> If the process is switched to other task, it is OK since it won't access
> local
> APIC. Current VP access to APIC will take the mutex first (see below).
> Or you are talking other corner case?
>
>   

apic access from process context is protected by kvm->lock, but apic
access from hrtimer is not.  Consider this scenario:

- guest accesses apic
- apic code starts modifying apic data
<preemption>
- timer fires
- apic_timer_fn() corrupts apic data

(I'm not even sure preemption is required here)

I think that in Xen this can't happen because is is not preemptible and
timers are processed when exiting back to the guest.



>> Do we really take kvm->lock for local accesses?  That's a significant
>> problem, much more than the timer.
>>     
>
> Today all APIC/IOAPIC access comes from shadow page fault which already
> take kvm->lock. KVM_IRQ_LINE will take too. (just noticed the
> save/restore part
> missed this one, will add later if we agree here). PIC access comes from
> kernel_pio which takes the mutex too.
>
> Another missing place is vmx_intr_assist which needs to take the mutex
> too.
> Will add later.
>
>   

The apic can be protected by vcpu->mutex, platform-wide things (pic,
ioapic) should be protected by kvm->lock.  This will work if we move all
apic processing to process context like I proposed in a previous mail.


>> I meant in addition to timer migration (I really like the timer
>> migration part -- it's much more important than lock removal for
>> performance). kvm_vcpu_kick() is needed to wake up from halt, or if we
>> have races between the timer and task migration.
>>     
>
> :-)  Actually we have solved this issue in previous patch and this one
> naturally.
> In adding back APIC timer IRQ patch, we will wakeup the halt vCPU.
>
> In this patch since hrtimer always run in same pCPU with guest VP
> (when VP is active), each time when hrtime fires (comes from a hardware
> IRQ), 
> it already VM Exit to kernel (similar function with kvm_vcpu_kick but 
> no need to explicitly call it) and then we do IRQ injection at
> vmx_intr_assist time.
>   

Yes, the two solutions are very similar.  But I think mine protects
against a race:

- scheduler starts migrating vcpu from cpu 0 to cpu 1
- hrtimer fires on cpu 0, but apic_timer_fn not called yet
- vcpu on cpu 1 migrates the hrtimer
- vcpu enters guest mode on cpu 1
- cpu 0 calls apic_timer_fn

In this case, there will be no wakeup.  So I think you do need to call
kvm_vcpu_kick() which will usually do nothing.

We also need to make sure all the non atomic code in __apic_timer_fn()
is executed in process context (it can use the pending count to decide
how much to add).

So I think there are three separate issues here:
- hrtimer migration: it helps performance, but doesn't help locking
- changing __apic_timer_fn() to only do atomic operations, and do the
nonatomic operations in process context under vcpu->mutex
- remove the apic lock

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to panic.


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found]                     ` <46D050AB.2000900-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-08-26 23:41                       ` Dong, Eddie
       [not found]                         ` <10EA09EFD8728347A513008B6B0DA77A01F846D6-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Dong, Eddie @ 2007-08-26 23:41 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel

Avi Kivity wrote:
>> 
>> Just noticed it is changed to mutex, but seems same here :-)
>> If the process is switched to other task, it is OK since it won't
>> access local APIC. Current VP access to APIC will take the mutex
>> first (see below). Or you are talking other corner case?
>> 
>> 
> 
> apic access from process context is protected by kvm->lock, but apic
> access from hrtimer is not.  Consider this scenario:
> 
> - guest accesses apic
> - apic code starts modifying apic data
> <preemption>
> - timer fires
> - apic_timer_fn() corrupts apic data
> 
> (I'm not even sure preemption is required here)
> 
> I think that in Xen this can't happen because is is not preemptible
> and timers are processed when exiting back to the guest.

For this situation, even without preemption, the problem is still there.
But maybe you are refering the old code, the latest code is already
preemption free since the apic_timer_fn didn't change any APIC 
state. It only increase apic->timer.pending.

When guest change/disable APIC time configuration, the hrtimer must be
canceled first so that no race with old hrtimer.

> 
> 
> 
>> 
>> 
> 
> The apic can be protected by vcpu->mutex, platform-wide things (pic,
> ioapic) should be protected by kvm->lock.  This will work if
> we move all
> apic processing to process context like I proposed in a previous mail.
> 
> 
>> 
>> In this patch since hrtimer always run in same pCPU with guest VP
>> (when VP is active), each time when hrtime fires (comes from a
>> hardware IRQ), it already VM Exit to kernel (similar function with
>> kvm_vcpu_kick but no need to explicitly call it) and then we do IRQ
>> injection at vmx_intr_assist time. 
>> 
> 
> Yes, the two solutions are very similar.  But I think mine protects
> against a race: 
> 
> - scheduler starts migrating vcpu from cpu 0 to cpu 1
> - hrtimer fires on cpu 0, but apic_timer_fn not called yet
> - vcpu on cpu 1 migrates the hrtimer

When CPU 1 do hrtimer migration, hrtimer_cancel will wait for
an in-flying timer be completed and then remove it. see
hrtimer_try_to_cancel.

> - vcpu enters guest mode on cpu 1
> - cpu 0 calls apic_timer_fn

The timer is either already canceled or be done before above migration
completion.

> 
> In this case, there will be no wakeup.  So I think you do need to call
> kvm_vcpu_kick() which will usually do nothing.
> 
> We also need to make sure all the non atomic code in __apic_timer_fn()
> is executed in process context (it can use the pending count to
> decide how much to add). 

I think today it is all atomic ops there. maybe I missed something.

> 
> So I think there are three separate issues here:
> - hrtimer migration: it helps performance, but doesn't help locking
> - changing __apic_timer_fn() to only do atomic operations, and do the
> nonatomic operations in process context under vcpu->mutex - remove
> the apic lock 
> 
thx,eddie

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found]                         ` <10EA09EFD8728347A513008B6B0DA77A01F846D6-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
@ 2007-08-27  0:30                           ` Avi Kivity
       [not found]                             ` <46D21B37.8030106-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
  0 siblings, 1 reply; 15+ messages in thread
From: Avi Kivity @ 2007-08-27  0:30 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm-devel

Dong, Eddie wrote:
> Avi Kivity wrote:
>   
>>> Just noticed it is changed to mutex, but seems same here :-)
>>> If the process is switched to other task, it is OK since it won't
>>> access local APIC. Current VP access to APIC will take the mutex
>>> first (see below). Or you are talking other corner case?
>>>
>>>
>>>       
>> apic access from process context is protected by kvm->lock, but apic
>> access from hrtimer is not.  Consider this scenario:
>>
>> - guest accesses apic
>> - apic code starts modifying apic data
>> <preemption>
>> - timer fires
>> - apic_timer_fn() corrupts apic data
>>
>> (I'm not even sure preemption is required here)
>>
>> I think that in Xen this can't happen because is is not preemptible
>> and timers are processed when exiting back to the guest.
>>     
>
> For this situation, even without preemption, the problem is still there.
> But maybe you are refering the old code, the latest code is already
> preemption free since the apic_timer_fn didn't change any APIC 
> state. It only increase apic->timer.pending.
>
>   

I see the following:

> static int __apic_timer_fn(struct kvm_lapic *apic)
> {
>         int result = 0;
>         wait_queue_head_t *q = &apic->vcpu->wq;
>
>         atomic_inc(&apic->timer.pending);
>         if (waitqueue_active(q))
>                 wake_up_interruptible(q);
>         if (apic_lvtt_period(apic)) {
>                 result = 1;
>                 apic->timer.dev.expires = ktime_add_ns(
>                                         apic->timer.dev.expires,
>                                         apic->timer.period);
>         }
>         return result;
> }

So, timer.dev.expires is protected by hrtimer internal locking?

Tricky, but it should work.

>>>       
>> The apic can be protected by vcpu->mutex, platform-wide things (pic,
>> ioapic) should be protected by kvm->lock.  This will work if
>> we move all
>> apic processing to process context like I proposed in a previous mail.
>>
>>
>>     
>>> In this patch since hrtimer always run in same pCPU with guest VP
>>> (when VP is active), each time when hrtime fires (comes from a
>>> hardware IRQ), it already VM Exit to kernel (similar function with
>>> kvm_vcpu_kick but no need to explicitly call it) and then we do IRQ
>>> injection at vmx_intr_assist time. 
>>>
>>>       
>> Yes, the two solutions are very similar.  But I think mine protects
>> against a race: 
>>
>> - scheduler starts migrating vcpu from cpu 0 to cpu 1
>> - hrtimer fires on cpu 0, but apic_timer_fn not called yet
>> - vcpu on cpu 1 migrates the hrtimer
>>     
>
> When CPU 1 do hrtimer migration, hrtimer_cancel will wait for
> an in-flying timer be completed and then remove it. see
> hrtimer_try_to_cancel.
>   

Okay.

I'm satisfied that it's safe now.  I'll add some comments later and commit.

-- 
Any sufficiently difficult bug is indistinguishable from a feature.


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found]                             ` <46D21B37.8030106-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
@ 2007-08-27  1:54                               ` Dong, Eddie
  0 siblings, 0 replies; 15+ messages in thread
From: Dong, Eddie @ 2007-08-27  1:54 UTC (permalink / raw)
  To: Avi Kivity; +Cc: kvm-devel

Avi Kivity wrote:
> Dong, Eddie wrote:
>> Avi Kivity wrote:
>> 
>> For this situation, even without preemption, the problem is still
>> there. But maybe you are refering the old code, the latest code is
>> already preemption free since the apic_timer_fn didn't change any
>> APIC state. It only increase apic->timer.pending.
>> 
>> 
> 
> I see the following:
> 
>> static int __apic_timer_fn(struct kvm_lapic *apic)
>> {
>>         int result = 0;
>>         wait_queue_head_t *q = &apic->vcpu->wq;
>> 
>>         atomic_inc(&apic->timer.pending);
>>         if (waitqueue_active(q))
>>                 wake_up_interruptible(q);
>>         if (apic_lvtt_period(apic)) {
>>                 result = 1;
>>                 apic->timer.dev.expires = ktime_add_ns(
>>                                         apic->timer.dev.expires,
>>                                         apic->timer.period);        
>>         } return result;
>> }
> 
> So, timer.dev.expires is protected by hrtimer internal locking?

Not sure, but apic->timer.dev.expires is only used in the call back
function.
So we are ok here.

> 
> Tricky, but it should work.
>> 
>> When CPU 1 do hrtimer migration, hrtimer_cancel will wait for
>> an in-flying timer be completed and then remove it. see
>> hrtimer_try_to_cancel. 
>> 
> 
> Okay.
> 
> I'm satisfied that it's safe now.  I'll add some comments
> later and commit.
> 

Thanks, I will try to fix the potential leak hole, i.e. vmx_intr_assist
need to take the mutex of both PIC & APIC.

For the issue Greg raised, i.e. for in kernel pv driver IRQ injection, 
probably we can leave to Greg and Dor who are working on pv drivers :-)
kvm->lock and vcou->mutex both are too big, but current apic->lock
(before
this patch) is too small and didn't solve the whole problem.

Thx, Eddie

-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: Remove APIC lock
       [not found] ` <10EA09EFD8728347A513008B6B0DA77A01F84594-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
  2007-08-25  8:05   ` Avi Kivity
@ 2007-09-03 13:48   ` Avi Kivity
  1 sibling, 0 replies; 15+ messages in thread
From: Avi Kivity @ 2007-09-03 13:48 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: kvm-devel

Dong, Eddie wrote:
> Avi:
>
>     apic->lock is used in many place to avoid race condition with apic
> timer call back
>     function which may run on different pCPU. This patch migrate the
> apic timer to
>     same CPU with the one VP runs on, thus the lock is no longer
> necessary.
>
> thx,eddie
>
>   

This is now applied into a new branch lapic6.  I split into removal of 
the lock, removal of kvm_lapic_get_regs(), and timer migration.  
Eventually the first two will be folded into preceding patches.  Please 
separate patches in the future.

-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems?  Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >>  http://get.splunk.com/

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2007-09-03 13:48 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-08-24 13:08 Remove APIC lock Dong, Eddie
     [not found] ` <10EA09EFD8728347A513008B6B0DA77A01F84594-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-08-25  8:05   ` Avi Kivity
     [not found]     ` <46CFE2C6.5020006-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-08-25 14:16       ` Dong, Eddie
     [not found]         ` <10EA09EFD8728347A513008B6B0DA77A01F84646-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-08-25 14:44           ` Avi Kivity
     [not found]             ` <46D04066.4060206-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-08-25 15:14               ` Avi Kivity
2007-08-25 15:25               ` Dong, Eddie
     [not found]                 ` <10EA09EFD8728347A513008B6B0DA77A01F8464D-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-08-25 15:54                   ` Avi Kivity
     [not found]                     ` <46D050AB.2000900-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-08-26 23:41                       ` Dong, Eddie
     [not found]                         ` <10EA09EFD8728347A513008B6B0DA77A01F846D6-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-08-27  0:30                           ` Avi Kivity
     [not found]                             ` <46D21B37.8030106-atKUWr5tajBWk0Htik3J/w@public.gmane.org>
2007-08-27  1:54                               ` Dong, Eddie
2007-09-03 13:48   ` Avi Kivity
  -- strict thread matches above, loose matches on Subject: below --
2007-08-24 13:47 Gregory Haskins
     [not found] ` <1187963266.4231.0.camel-5CR4LY5GPkvLDviKLk5550HKjMygAv58XqFh9Ls21Oc@public.gmane.org>
2007-08-24 13:55   ` Dong, Eddie
     [not found]     ` <10EA09EFD8728347A513008B6B0DA77A01F845A5-wq7ZOvIWXbNpB2pF5aRoyrfspsVTdybXVpNB7YpNyf8@public.gmane.org>
2007-08-24 14:24       ` Dong, Eddie
2007-08-24 14:39     ` Gregory Haskins

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox