public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
* [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
@ 2010-02-17 14:00 Thomas Gleixner
  2010-02-18  9:12 ` Jan Kiszka
                   ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Thomas Gleixner @ 2010-02-17 14:00 UTC (permalink / raw)
  To: KVM; +Cc: Avi Kivity

[-- Attachment #1: 0129.patch --]
[-- Type: text/plain, Size: 6832 bytes --]

The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
them to raw_spinlock. No change for !RT kernels.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

---
 arch/x86/kvm/i8254.c |   10 +++++-----
 arch/x86/kvm/i8254.h |    2 +-
 arch/x86/kvm/i8259.c |   31 ++++++++++++++++---------------
 arch/x86/kvm/irq.h   |    2 +-
 arch/x86/kvm/x86.c   |    8 ++++----
 5 files changed, 27 insertions(+), 26 deletions(-)

Index: linux-2.6-tip/arch/x86/kvm/i8254.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kvm/i8254.c
+++ linux-2.6-tip/arch/x86/kvm/i8254.c
@@ -242,11 +242,11 @@ static void kvm_pit_ack_irq(struct kvm_i
 {
 	struct kvm_kpit_state *ps = container_of(kian, struct kvm_kpit_state,
 						 irq_ack_notifier);
-	spin_lock(&ps->inject_lock);
+	raw_spin_lock(&ps->inject_lock);
 	if (atomic_dec_return(&ps->pit_timer.pending) < 0)
 		atomic_inc(&ps->pit_timer.pending);
 	ps->irq_ack = 1;
-	spin_unlock(&ps->inject_lock);
+	raw_spin_unlock(&ps->inject_lock);
 }
 
 void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
@@ -624,7 +624,7 @@ struct kvm_pit *kvm_create_pit(struct kv
 
 	mutex_init(&pit->pit_state.lock);
 	mutex_lock(&pit->pit_state.lock);
-	spin_lock_init(&pit->pit_state.inject_lock);
+	raw_spin_lock_init(&pit->pit_state.inject_lock);
 
 	kvm->arch.vpit = pit;
 	pit->kvm = kvm;
@@ -723,12 +723,12 @@ void kvm_inject_pit_timer_irqs(struct kv
 		/* Try to inject pending interrupts when
 		 * last one has been acked.
 		 */
-		spin_lock(&ps->inject_lock);
+		raw_spin_lock(&ps->inject_lock);
 		if (atomic_read(&ps->pit_timer.pending) && ps->irq_ack) {
 			ps->irq_ack = 0;
 			inject = 1;
 		}
-		spin_unlock(&ps->inject_lock);
+		raw_spin_unlock(&ps->inject_lock);
 		if (inject)
 			__inject_pit_timer_intr(kvm);
 	}
Index: linux-2.6-tip/arch/x86/kvm/i8254.h
===================================================================
--- linux-2.6-tip.orig/arch/x86/kvm/i8254.h
+++ linux-2.6-tip/arch/x86/kvm/i8254.h
@@ -27,7 +27,7 @@ struct kvm_kpit_state {
 	u32    speaker_data_on;
 	struct mutex lock;
 	struct kvm_pit *pit;
-	spinlock_t inject_lock;
+	raw_spinlock_t inject_lock;
 	unsigned long irq_ack;
 	struct kvm_irq_ack_notifier irq_ack_notifier;
 };
Index: linux-2.6-tip/arch/x86/kvm/i8259.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kvm/i8259.c
+++ linux-2.6-tip/arch/x86/kvm/i8259.c
@@ -44,18 +44,19 @@ static void pic_clear_isr(struct kvm_kpi
 	 * Other interrupt may be delivered to PIC while lock is dropped but
 	 * it should be safe since PIC state is already updated at this stage.
 	 */
-	spin_unlock(&s->pics_state->lock);
+	raw_spin_unlock(&s->pics_state->lock);
 	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
-	spin_lock(&s->pics_state->lock);
+	raw_spin_lock(&s->pics_state->lock);
 }
 
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
 {
 	struct kvm_pic *s = pic_irqchip(kvm);
-	spin_lock(&s->lock);
+
+	raw_spin_lock(&s->lock);
 	s->pics[0].isr_ack = 0xff;
 	s->pics[1].isr_ack = 0xff;
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 }
 
 /*
@@ -156,9 +157,9 @@ static void pic_update_irq(struct kvm_pi
 
 void kvm_pic_update_irq(struct kvm_pic *s)
 {
-	spin_lock(&s->lock);
+	raw_spin_lock(&s->lock);
 	pic_update_irq(s);
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 }
 
 int kvm_pic_set_irq(void *opaque, int irq, int level)
@@ -166,14 +167,14 @@ int kvm_pic_set_irq(void *opaque, int ir
 	struct kvm_pic *s = opaque;
 	int ret = -1;
 
-	spin_lock(&s->lock);
+	raw_spin_lock(&s->lock);
 	if (irq >= 0 && irq < PIC_NUM_PINS) {
 		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
 		pic_update_irq(s);
 		trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
 				      s->pics[irq >> 3].imr, ret == 0);
 	}
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 
 	return ret;
 }
@@ -203,7 +204,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 	int irq, irq2, intno;
 	struct kvm_pic *s = pic_irqchip(kvm);
 
-	spin_lock(&s->lock);
+	raw_spin_lock(&s->lock);
 	irq = pic_get_irq(&s->pics[0]);
 	if (irq >= 0) {
 		pic_intack(&s->pics[0], irq);
@@ -228,7 +229,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 		intno = s->pics[0].irq_base + irq;
 	}
 	pic_update_irq(s);
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 
 	return intno;
 }
@@ -442,7 +443,7 @@ static int picdev_write(struct kvm_io_de
 			printk(KERN_ERR "PIC: non byte write\n");
 		return 0;
 	}
-	spin_lock(&s->lock);
+	raw_spin_lock(&s->lock);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -455,7 +456,7 @@ static int picdev_write(struct kvm_io_de
 		elcr_ioport_write(&s->pics[addr & 1], addr, data);
 		break;
 	}
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 	return 0;
 }
 
@@ -472,7 +473,7 @@ static int picdev_read(struct kvm_io_dev
 			printk(KERN_ERR "PIC: non byte read\n");
 		return 0;
 	}
-	spin_lock(&s->lock);
+	raw_spin_lock(&s->lock);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -486,7 +487,7 @@ static int picdev_read(struct kvm_io_dev
 		break;
 	}
 	*(unsigned char *)val = data;
-	spin_unlock(&s->lock);
+	raw_spin_unlock(&s->lock);
 	return 0;
 }
 
@@ -520,7 +521,7 @@ struct kvm_pic *kvm_create_pic(struct kv
 	s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL);
 	if (!s)
 		return NULL;
-	spin_lock_init(&s->lock);
+	raw_spin_lock_init(&s->lock);
 	s->kvm = kvm;
 	s->pics[0].elcr_mask = 0xf8;
 	s->pics[1].elcr_mask = 0xde;
Index: linux-2.6-tip/arch/x86/kvm/irq.h
===================================================================
--- linux-2.6-tip.orig/arch/x86/kvm/irq.h
+++ linux-2.6-tip/arch/x86/kvm/irq.h
@@ -62,7 +62,7 @@ struct kvm_kpic_state {
 };
 
 struct kvm_pic {
-	spinlock_t lock;
+	raw_spinlock_t lock;
 	unsigned pending_acks;
 	struct kvm *kvm;
 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
Index: linux-2.6-tip/arch/x86/kvm/x86.c
===================================================================
--- linux-2.6-tip.orig/arch/x86/kvm/x86.c
+++ linux-2.6-tip/arch/x86/kvm/x86.c
@@ -2273,18 +2273,18 @@ static int kvm_vm_ioctl_set_irqchip(stru
 	r = 0;
 	switch (chip->chip_id) {
 	case KVM_IRQCHIP_PIC_MASTER:
-		spin_lock(&pic_irqchip(kvm)->lock);
+		raw_spin_lock(&pic_irqchip(kvm)->lock);
 		memcpy(&pic_irqchip(kvm)->pics[0],
 			&chip->chip.pic,
 			sizeof(struct kvm_pic_state));
-		spin_unlock(&pic_irqchip(kvm)->lock);
+		raw_spin_unlock(&pic_irqchip(kvm)->lock);
 		break;
 	case KVM_IRQCHIP_PIC_SLAVE:
-		spin_lock(&pic_irqchip(kvm)->lock);
+		raw_spin_lock(&pic_irqchip(kvm)->lock);
 		memcpy(&pic_irqchip(kvm)->pics[1],
 			&chip->chip.pic,
 			sizeof(struct kvm_pic_state));
-		spin_unlock(&pic_irqchip(kvm)->lock);
+		raw_spin_unlock(&pic_irqchip(kvm)->lock);
 		break;
 	case KVM_IRQCHIP_IOAPIC:
 		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);



^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-17 14:00 [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks Thomas Gleixner
@ 2010-02-18  9:12 ` Jan Kiszka
  2010-02-18  9:20   ` Avi Kivity
  2010-02-18  9:19 ` Avi Kivity
  2010-02-23 19:18 ` Jan Kiszka
  2 siblings, 1 reply; 26+ messages in thread
From: Jan Kiszka @ 2010-02-18  9:12 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: KVM, Avi Kivity

Thomas Gleixner wrote:
> The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
> them to raw_spinlock. No change for !RT kernels.

Last time I ran KVM over RT, I also had to convert requests_lock (struct
kvm): make_all_cpus_request assumes that this lock prevents migration.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-17 14:00 [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks Thomas Gleixner
  2010-02-18  9:12 ` Jan Kiszka
@ 2010-02-18  9:19 ` Avi Kivity
  2010-02-23 19:18 ` Jan Kiszka
  2 siblings, 0 replies; 26+ messages in thread
From: Avi Kivity @ 2010-02-18  9:19 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: KVM

On 02/17/2010 04:00 PM, Thomas Gleixner wrote:
> The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
> them to raw_spinlock. No change for !RT kernels.
>    

Applied, thanks.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-18  9:12 ` Jan Kiszka
@ 2010-02-18  9:20   ` Avi Kivity
  2010-02-18  9:40     ` Jan Kiszka
  0 siblings, 1 reply; 26+ messages in thread
From: Avi Kivity @ 2010-02-18  9:20 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Thomas Gleixner, KVM

On 02/18/2010 11:12 AM, Jan Kiszka wrote:
> Thomas Gleixner wrote:
>    
>> The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
>> them to raw_spinlock. No change for !RT kernels.
>>      
> Last time I ran KVM over RT, I also had to convert requests_lock (struct
> kvm): make_all_cpus_request assumes that this lock prevents migration.
>
>    

True.  Will commit something to that effect.

Meanwhile, if anyone has any idea how to kill this lock, I'd love to see it.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-18  9:20   ` Avi Kivity
@ 2010-02-18  9:40     ` Jan Kiszka
  2010-02-18  9:45       ` Avi Kivity
  0 siblings, 1 reply; 26+ messages in thread
From: Jan Kiszka @ 2010-02-18  9:40 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Thomas Gleixner, KVM

Avi Kivity wrote:
> On 02/18/2010 11:12 AM, Jan Kiszka wrote:
>> Thomas Gleixner wrote:
>>    
>>> The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
>>> them to raw_spinlock. No change for !RT kernels.
>>>      
>> Last time I ran KVM over RT, I also had to convert requests_lock (struct
>> kvm): make_all_cpus_request assumes that this lock prevents migration.
>>
>>    
> 
> True.  Will commit something to that effect.
> 
> Meanwhile, if anyone has any idea how to kill this lock, I'd love to see it.
> 

What concurrency does it resolve in the end? On first glance, it only
synchronize the fiddling with pre-VCPU request bits, right? What forces
us to do this? Wouldn't it suffice to disable preemption (thus
migration) and the let concurrent requests race for setting the bits? I
mean if some request bit was already set on entry, we don't include the
 related VCPU in smp_call_function_many anyway.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-18  9:40     ` Jan Kiszka
@ 2010-02-18  9:45       ` Avi Kivity
  2010-02-18  9:49         ` Jan Kiszka
  2010-02-18  9:50         ` Avi Kivity
  0 siblings, 2 replies; 26+ messages in thread
From: Avi Kivity @ 2010-02-18  9:45 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Thomas Gleixner, KVM

On 02/18/2010 11:40 AM, Jan Kiszka wrote:
>
>> Meanwhile, if anyone has any idea how to kill this lock, I'd love to see it.
>>
>>      
> What concurrency does it resolve in the end? On first glance, it only
> synchronize the fiddling with pre-VCPU request bits, right? What forces
> us to do this? Wouldn't it suffice to disable preemption (thus
> migration) and the let concurrent requests race for setting the bits? I
> mean if some request bit was already set on entry, we don't include the
>   related VCPU in smp_call_function_many anyway.
>    

It's more difficult.

vcpu 0: sets request bit on vcpu 2
           vcpu 1: test_and_set request bit on vcpu 2, returns already set
           vcpu 1: returns
vcpu 0: sends IPI
vcpu 0: returns

so vcpu 1 returns before the IPI was performed.  If the request was a 
tlb flush, for example, vcpu 1 may free a page that is still in vcpu 2's 
tlb.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-18  9:45       ` Avi Kivity
@ 2010-02-18  9:49         ` Jan Kiszka
  2010-02-18  9:53           ` Avi Kivity
  2010-02-18  9:50         ` Avi Kivity
  1 sibling, 1 reply; 26+ messages in thread
From: Jan Kiszka @ 2010-02-18  9:49 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Thomas Gleixner, KVM

Avi Kivity wrote:
> On 02/18/2010 11:40 AM, Jan Kiszka wrote:
>>> Meanwhile, if anyone has any idea how to kill this lock, I'd love to see it.
>>>
>>>      
>> What concurrency does it resolve in the end? On first glance, it only
>> synchronize the fiddling with pre-VCPU request bits, right? What forces
>> us to do this? Wouldn't it suffice to disable preemption (thus
>> migration) and the let concurrent requests race for setting the bits? I
>> mean if some request bit was already set on entry, we don't include the
>>   related VCPU in smp_call_function_many anyway.
>>    
> 
> It's more difficult.
> 
> vcpu 0: sets request bit on vcpu 2
>            vcpu 1: test_and_set request bit on vcpu 2, returns already set
>            vcpu 1: returns
> vcpu 0: sends IPI
> vcpu 0: returns
> 
> so vcpu 1 returns before the IPI was performed.  If the request was a 
> tlb flush, for example, vcpu 1 may free a page that is still in vcpu 2's 
> tlb.

So the requests bits we are interested in are exclusively set in this
function under requests_lock?

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-18  9:45       ` Avi Kivity
  2010-02-18  9:49         ` Jan Kiszka
@ 2010-02-18  9:50         ` Avi Kivity
  2010-02-18 10:05           ` Jan Kiszka
  1 sibling, 1 reply; 26+ messages in thread
From: Avi Kivity @ 2010-02-18  9:50 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Thomas Gleixner, KVM

On 02/18/2010 11:45 AM, Avi Kivity wrote:
> On 02/18/2010 11:40 AM, Jan Kiszka wrote:
>>
>>> Meanwhile, if anyone has any idea how to kill this lock, I'd love to 
>>> see it.
>>>
>> What concurrency does it resolve in the end? On first glance, it only
>> synchronize the fiddling with pre-VCPU request bits, right? What forces
>> us to do this? Wouldn't it suffice to disable preemption (thus
>> migration) and the let concurrent requests race for setting the bits? I
>> mean if some request bit was already set on entry, we don't include the
>>   related VCPU in smp_call_function_many anyway.
>
> It's more difficult.
>
> vcpu 0: sets request bit on vcpu 2
>           vcpu 1: test_and_set request bit on vcpu 2, returns already set
>           vcpu 1: returns
> vcpu 0: sends IPI
> vcpu 0: returns
>
> so vcpu 1 returns before the IPI was performed.  If the request was a 
> tlb flush, for example, vcpu 1 may free a page that is still in vcpu 
> 2's tlb.

One way out would be to have a KVM_REQ_IN_PROGRESS, set it in 
make_request, clear it in the IPI function.

If a second make_request sees it already set, it can simply busy wait 
until it is cleared, without sending the IPI.  Of course the busy wait 
means we can't enable preemption (or we may busy wait on an unscheduled 
task), but at least the requests can proceed in parallel instead of 
serializing.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-18  9:49         ` Jan Kiszka
@ 2010-02-18  9:53           ` Avi Kivity
  0 siblings, 0 replies; 26+ messages in thread
From: Avi Kivity @ 2010-02-18  9:53 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Thomas Gleixner, KVM

On 02/18/2010 11:49 AM, Jan Kiszka wrote:
> Avi Kivity wrote:
>    
>> On 02/18/2010 11:40 AM, Jan Kiszka wrote:
>>      
>>>> Meanwhile, if anyone has any idea how to kill this lock, I'd love to see it.
>>>>
>>>>
>>>>          
>>> What concurrency does it resolve in the end? On first glance, it only
>>> synchronize the fiddling with pre-VCPU request bits, right? What forces
>>> us to do this? Wouldn't it suffice to disable preemption (thus
>>> migration) and the let concurrent requests race for setting the bits? I
>>> mean if some request bit was already set on entry, we don't include the
>>>    related VCPU in smp_call_function_many anyway.
>>>
>>>        
>> It's more difficult.
>>
>> vcpu 0: sets request bit on vcpu 2
>>             vcpu 1: test_and_set request bit on vcpu 2, returns already set
>>             vcpu 1: returns
>> vcpu 0: sends IPI
>> vcpu 0: returns
>>
>> so vcpu 1 returns before the IPI was performed.  If the request was a
>> tlb flush, for example, vcpu 1 may free a page that is still in vcpu 2's
>> tlb.
>>      
> So the requests bits we are interested in are exclusively set in this
> function under requests_lock?
>    

Yes.  vcpu 1 may still see the bit set (if vcpu 2 didn't reach the start 
of its loop and cleared it), so it's not total serialization, but nearly so.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-18  9:50         ` Avi Kivity
@ 2010-02-18 10:05           ` Jan Kiszka
  2010-02-18 10:18             ` Avi Kivity
  0 siblings, 1 reply; 26+ messages in thread
From: Jan Kiszka @ 2010-02-18 10:05 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Thomas Gleixner, KVM

Avi Kivity wrote:
> On 02/18/2010 11:45 AM, Avi Kivity wrote:
>> On 02/18/2010 11:40 AM, Jan Kiszka wrote:
>>>> Meanwhile, if anyone has any idea how to kill this lock, I'd love to 
>>>> see it.
>>>>
>>> What concurrency does it resolve in the end? On first glance, it only
>>> synchronize the fiddling with pre-VCPU request bits, right? What forces
>>> us to do this? Wouldn't it suffice to disable preemption (thus
>>> migration) and the let concurrent requests race for setting the bits? I
>>> mean if some request bit was already set on entry, we don't include the
>>>   related VCPU in smp_call_function_many anyway.
>> It's more difficult.
>>
>> vcpu 0: sets request bit on vcpu 2
>>           vcpu 1: test_and_set request bit on vcpu 2, returns already set
>>           vcpu 1: returns
>> vcpu 0: sends IPI
>> vcpu 0: returns
>>
>> so vcpu 1 returns before the IPI was performed.  If the request was a 
>> tlb flush, for example, vcpu 1 may free a page that is still in vcpu 
>> 2's tlb.
> 
> One way out would be to have a KVM_REQ_IN_PROGRESS, set it in 
> make_request, clear it in the IPI function.
> 
> If a second make_request sees it already set, it can simply busy wait 
> until it is cleared, without sending the IPI.  Of course the busy wait 
> means we can't enable preemption (or we may busy wait on an unscheduled 
> task), but at least the requests can proceed in parallel instead of 
> serializing.

...or include VCPUs with KVM_REQ_IN_PROGRESS set into the IPI set even
if they already have the desired request bit set. Then we should
serialize in smp_call_function_many.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-18 10:05           ` Jan Kiszka
@ 2010-02-18 10:18             ` Avi Kivity
  2010-02-19  1:14               ` Marcelo Tosatti
  0 siblings, 1 reply; 26+ messages in thread
From: Avi Kivity @ 2010-02-18 10:18 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Thomas Gleixner, KVM

On 02/18/2010 12:05 PM, Jan Kiszka wrote:
> Avi Kivity wrote:
>    
>> On 02/18/2010 11:45 AM, Avi Kivity wrote:
>>      
>>> On 02/18/2010 11:40 AM, Jan Kiszka wrote:
>>>        
>>>>> Meanwhile, if anyone has any idea how to kill this lock, I'd love to
>>>>> see it.
>>>>>
>>>>>            
>>>> What concurrency does it resolve in the end? On first glance, it only
>>>> synchronize the fiddling with pre-VCPU request bits, right? What forces
>>>> us to do this? Wouldn't it suffice to disable preemption (thus
>>>> migration) and the let concurrent requests race for setting the bits? I
>>>> mean if some request bit was already set on entry, we don't include the
>>>>    related VCPU in smp_call_function_many anyway.
>>>>          
>>> It's more difficult.
>>>
>>> vcpu 0: sets request bit on vcpu 2
>>>            vcpu 1: test_and_set request bit on vcpu 2, returns already set
>>>            vcpu 1: returns
>>> vcpu 0: sends IPI
>>> vcpu 0: returns
>>>
>>> so vcpu 1 returns before the IPI was performed.  If the request was a
>>> tlb flush, for example, vcpu 1 may free a page that is still in vcpu
>>> 2's tlb.
>>>        
>> One way out would be to have a KVM_REQ_IN_PROGRESS, set it in
>> make_request, clear it in the IPI function.
>>
>> If a second make_request sees it already set, it can simply busy wait
>> until it is cleared, without sending the IPI.  Of course the busy wait
>> means we can't enable preemption (or we may busy wait on an unscheduled
>> task), but at least the requests can proceed in parallel instead of
>> serializing.
>>      
> ...or include VCPUs with KVM_REQ_IN_PROGRESS set into the IPI set even
> if they already have the desired request bit set.

But then we're making them take the IPI, which is pointless and 
expensive.  My approach piggy backs multiple requesters on one IPI.

> Then we should
> serialize in smp_call_function_many.
>    

Do you mean rely on s_c_f_m's internal synchronization?

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-18 10:18             ` Avi Kivity
@ 2010-02-19  1:14               ` Marcelo Tosatti
  0 siblings, 0 replies; 26+ messages in thread
From: Marcelo Tosatti @ 2010-02-19  1:14 UTC (permalink / raw)
  To: Avi Kivity; +Cc: Jan Kiszka, Thomas Gleixner, KVM

On Thu, Feb 18, 2010 at 12:18:25PM +0200, Avi Kivity wrote:
> On 02/18/2010 12:05 PM, Jan Kiszka wrote:
> >Avi Kivity wrote:
> >>On 02/18/2010 11:45 AM, Avi Kivity wrote:
> >>>On 02/18/2010 11:40 AM, Jan Kiszka wrote:
> >>>>>Meanwhile, if anyone has any idea how to kill this lock, I'd love to
> >>>>>see it.
> >>>>>
> >>>>What concurrency does it resolve in the end? On first glance, it only
> >>>>synchronize the fiddling with pre-VCPU request bits, right? What forces
> >>>>us to do this? Wouldn't it suffice to disable preemption (thus
> >>>>migration) and the let concurrent requests race for setting the bits? I
> >>>>mean if some request bit was already set on entry, we don't include the
> >>>>   related VCPU in smp_call_function_many anyway.
> >>>It's more difficult.
> >>>
> >>>vcpu 0: sets request bit on vcpu 2
> >>>           vcpu 1: test_and_set request bit on vcpu 2, returns already set
> >>>           vcpu 1: returns
> >>>vcpu 0: sends IPI
> >>>vcpu 0: returns
> >>>
> >>>so vcpu 1 returns before the IPI was performed.  If the request was a
> >>>tlb flush, for example, vcpu 1 may free a page that is still in vcpu
> >>>2's tlb.
> >>One way out would be to have a KVM_REQ_IN_PROGRESS, set it in
> >>make_request, clear it in the IPI function.
> >>
> >>If a second make_request sees it already set, it can simply busy wait
> >>until it is cleared, without sending the IPI.  Of course the busy wait
> >>means we can't enable preemption (or we may busy wait on an unscheduled
> >>task), but at least the requests can proceed in parallel instead of
> >>serializing.
>
> >...or include VCPUs with KVM_REQ_IN_PROGRESS set into the IPI set even
> >if they already have the desired request bit set.
> 
> But then we're making them take the IPI, which is pointless and
> expensive.  My approach piggy backs multiple requesters on one IPI.

I have played with this in the past (collapsing that would avoid two
simultaneous requestors from issuing two IPI's to a given vcpu, and
unification with KVM_REQ_KICK to avoid the IPI if vcpu not in guest
mode).

Its not worthwhile though, this is not a contention point with TDP
(maybe it becomes in the future with fine grained flushing, but not
ATM).

> >Then we should
> >serialize in smp_call_function_many.
> 
> Do you mean rely on s_c_f_m's internal synchronization?


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-17 14:00 [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks Thomas Gleixner
  2010-02-18  9:12 ` Jan Kiszka
  2010-02-18  9:19 ` Avi Kivity
@ 2010-02-23 19:18 ` Jan Kiszka
  2010-02-23 22:23   ` Thomas Gleixner
  2 siblings, 1 reply; 26+ messages in thread
From: Jan Kiszka @ 2010-02-23 19:18 UTC (permalink / raw)
  To: Thomas Gleixner, Avi Kivity
  Cc: KVM, Gleb Natapov, RT, Linux Kernel Mailing List

Thomas Gleixner wrote:
> The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
> them to raw_spinlock. No change for !RT kernels.

Doesn't fly for -rt anymore: pic_irq_update runs under this raw lock and
calls kvm_vcpu_kick which tries to wake_up some thread -> scheduling
while atomic.

This used to work up to 956f97cf. -rt for 2.6.31 is not yet affected,
but 2.6.33 should be broken (haven't checked, using kvm-kmod over 2.6.31
ATM). I can provide a patch that restores the deferred kicking if it's
acceptable for upstream.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks
  2010-02-23 19:18 ` Jan Kiszka
@ 2010-02-23 22:23   ` Thomas Gleixner
  2010-02-24  9:41     ` [PATCH] KVM: x86: Kick VCPU outside PIC lock again Jan Kiszka
  0 siblings, 1 reply; 26+ messages in thread
From: Thomas Gleixner @ 2010-02-23 22:23 UTC (permalink / raw)
  To: Jan Kiszka; +Cc: Avi Kivity, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

On Tue, 23 Feb 2010, Jan Kiszka wrote:

> Thomas Gleixner wrote:
> > The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
> > them to raw_spinlock. No change for !RT kernels.
> 
> Doesn't fly for -rt anymore: pic_irq_update runs under this raw lock and
> calls kvm_vcpu_kick which tries to wake_up some thread -> scheduling
> while atomic.

Hmm, a wakeup itself is fine. Is that code waking a wake queue ?
 
> This used to work up to 956f97cf. -rt for 2.6.31 is not yet affected,
> but 2.6.33 should be broken (haven't checked, using kvm-kmod over 2.6.31
> ATM). I can provide a patch that restores the deferred kicking if it's
> acceptable for upstream.

Well, at least is would be nice to have one for -rt.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-23 22:23   ` Thomas Gleixner
@ 2010-02-24  9:41     ` Jan Kiszka
  2010-02-24  9:48       ` Avi Kivity
  0 siblings, 1 reply; 26+ messages in thread
From: Jan Kiszka @ 2010-02-24  9:41 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Avi Kivity, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

Thomas Gleixner wrote:
> On Tue, 23 Feb 2010, Jan Kiszka wrote:
> 
>> Thomas Gleixner wrote:
>>> The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
>>> them to raw_spinlock. No change for !RT kernels.
>> Doesn't fly for -rt anymore: pic_irq_update runs under this raw lock and
>> calls kvm_vcpu_kick which tries to wake_up some thread -> scheduling
>> while atomic.
> 
> Hmm, a wakeup itself is fine. Is that code waking a wake queue ?

Yes, it's a wake queue.

>  
>> This used to work up to 956f97cf. -rt for 2.6.31 is not yet affected,
>> but 2.6.33 should be broken (haven't checked, using kvm-kmod over 2.6.31
>> ATM). I can provide a patch that restores the deferred kicking if it's
>> acceptable for upstream.
> 
> Well, at least is would be nice to have one for -rt.
> 

Here we go. Haven't run kvm.git long enough over -rt yet to say that it
was the only remaining issue, but at least it doesn't complain instantly
anymore when starting a VM.

Jan

---------->

This restores the deferred VCPU kicking before 956f97cf. We need this
over -rt as wake_up* requires non-atomic context in this configuration.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---

 arch/x86/kvm/i8259.c |   53 ++++++++++++++++++++++++++++++++++++--------------
 arch/x86/kvm/irq.h   |    1 +
 2 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 07771da..ca426bd 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -32,6 +32,29 @@
 #include <linux/kvm_host.h>
 #include "trace.h"
 
+static void pic_lock(struct kvm_pic *s)
+	__acquires(&s->lock)
+{
+	raw_spin_lock(&s->lock);
+}
+
+static void pic_unlock(struct kvm_pic *s)
+	__releases(&s->lock)
+{
+	bool wakeup = s->wakeup_needed;
+	struct kvm_vcpu *vcpu;
+
+	s->wakeup_needed = false;
+
+	raw_spin_unlock(&s->lock);
+
+	if (wakeup) {
+		vcpu = s->kvm->bsp_vcpu;
+		if (vcpu)
+			kvm_vcpu_kick(vcpu);
+	}
+}
+
 static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 {
 	s->isr &= ~(1 << irq);
@@ -44,19 +67,19 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 	 * Other interrupt may be delivered to PIC while lock is dropped but
 	 * it should be safe since PIC state is already updated at this stage.
 	 */
-	raw_spin_unlock(&s->pics_state->lock);
+	pic_unlock(s->pics_state);
 	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
-	raw_spin_lock(&s->pics_state->lock);
+	pic_lock(s->pics_state);
 }
 
 void kvm_pic_clear_isr_ack(struct kvm *kvm)
 {
 	struct kvm_pic *s = pic_irqchip(kvm);
 
-	raw_spin_lock(&s->lock);
+	pic_lock(s);
 	s->pics[0].isr_ack = 0xff;
 	s->pics[1].isr_ack = 0xff;
-	raw_spin_unlock(&s->lock);
+	pic_unlock(s);
 }
 
 /*
@@ -157,9 +180,9 @@ static void pic_update_irq(struct kvm_pic *s)
 
 void kvm_pic_update_irq(struct kvm_pic *s)
 {
-	raw_spin_lock(&s->lock);
+	pic_lock(s);
 	pic_update_irq(s);
-	raw_spin_unlock(&s->lock);
+	pic_unlock(s);
 }
 
 int kvm_pic_set_irq(void *opaque, int irq, int level)
@@ -167,14 +190,14 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
 	struct kvm_pic *s = opaque;
 	int ret = -1;
 
-	raw_spin_lock(&s->lock);
+	pic_lock(s);
 	if (irq >= 0 && irq < PIC_NUM_PINS) {
 		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
 		pic_update_irq(s);
 		trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
 				      s->pics[irq >> 3].imr, ret == 0);
 	}
-	raw_spin_unlock(&s->lock);
+	pic_unlock(s);
 
 	return ret;
 }
@@ -204,7 +227,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 	int irq, irq2, intno;
 	struct kvm_pic *s = pic_irqchip(kvm);
 
-	raw_spin_lock(&s->lock);
+	pic_lock(s);
 	irq = pic_get_irq(&s->pics[0]);
 	if (irq >= 0) {
 		pic_intack(&s->pics[0], irq);
@@ -229,7 +252,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
 		intno = s->pics[0].irq_base + irq;
 	}
 	pic_update_irq(s);
-	raw_spin_unlock(&s->lock);
+	pic_unlock(s);
 
 	return intno;
 }
@@ -443,7 +466,7 @@ static int picdev_write(struct kvm_io_device *this,
 			printk(KERN_ERR "PIC: non byte write\n");
 		return 0;
 	}
-	raw_spin_lock(&s->lock);
+	pic_lock(s);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -456,7 +479,7 @@ static int picdev_write(struct kvm_io_device *this,
 		elcr_ioport_write(&s->pics[addr & 1], addr, data);
 		break;
 	}
-	raw_spin_unlock(&s->lock);
+	pic_unlock(s);
 	return 0;
 }
 
@@ -473,7 +496,7 @@ static int picdev_read(struct kvm_io_device *this,
 			printk(KERN_ERR "PIC: non byte read\n");
 		return 0;
 	}
-	raw_spin_lock(&s->lock);
+	pic_lock(s);
 	switch (addr) {
 	case 0x20:
 	case 0x21:
@@ -487,7 +510,7 @@ static int picdev_read(struct kvm_io_device *this,
 		break;
 	}
 	*(unsigned char *)val = data;
-	raw_spin_unlock(&s->lock);
+	pic_unlock(s);
 	return 0;
 }
 
@@ -504,7 +527,7 @@ static void pic_irq_request(void *opaque, int level)
 	s->output = level;
 	if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) {
 		s->pics[0].isr_ack &= ~(1 << irq);
-		kvm_vcpu_kick(vcpu);
+		s->wakeup_needed = true;
 	}
 }
 
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 34b1591..cd1f362 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -63,6 +63,7 @@ struct kvm_kpic_state {
 
 struct kvm_pic {
 	raw_spinlock_t lock;
+	bool wakeup_needed;
 	unsigned pending_acks;
 	struct kvm *kvm;
 	struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-24  9:41     ` [PATCH] KVM: x86: Kick VCPU outside PIC lock again Jan Kiszka
@ 2010-02-24  9:48       ` Avi Kivity
  2010-02-24  9:54         ` Jan Kiszka
  0 siblings, 1 reply; 26+ messages in thread
From: Avi Kivity @ 2010-02-24  9:48 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Thomas Gleixner, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

On 02/24/2010 11:41 AM, Jan Kiszka wrote:
> Thomas Gleixner wrote:
>    
>> On Tue, 23 Feb 2010, Jan Kiszka wrote:
>>
>>      
>>> Thomas Gleixner wrote:
>>>        
>>>> The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
>>>> them to raw_spinlock. No change for !RT kernels.
>>>>          
>>> Doesn't fly for -rt anymore: pic_irq_update runs under this raw lock and
>>> calls kvm_vcpu_kick which tries to wake_up some thread ->  scheduling
>>> while atomic.
>>>        
>> Hmm, a wakeup itself is fine. Is that code waking a wake queue ?
>>      
> Yes, it's a wake queue.
>    

So what's the core issue?  Is the lock_t in the wait_queue a sleeping mutex?

> This restores the deferred VCPU kicking before 956f97cf. We need this
> over -rt as wake_up* requires non-atomic context in this configuration.
>
>    

Seems sane, will apply once I understand why the current code fails.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-24  9:48       ` Avi Kivity
@ 2010-02-24  9:54         ` Jan Kiszka
  2010-02-24 10:04           ` Avi Kivity
  0 siblings, 1 reply; 26+ messages in thread
From: Jan Kiszka @ 2010-02-24  9:54 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Thomas Gleixner, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

Avi Kivity wrote:
> On 02/24/2010 11:41 AM, Jan Kiszka wrote:
>> Thomas Gleixner wrote:
>>    
>>> On Tue, 23 Feb 2010, Jan Kiszka wrote:
>>>
>>>      
>>>> Thomas Gleixner wrote:
>>>>        
>>>>> The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
>>>>> them to raw_spinlock. No change for !RT kernels.
>>>>>          
>>>> Doesn't fly for -rt anymore: pic_irq_update runs under this raw lock and
>>>> calls kvm_vcpu_kick which tries to wake_up some thread ->  scheduling
>>>> while atomic.
>>>>        
>>> Hmm, a wakeup itself is fine. Is that code waking a wake queue ?
>>>      
>> Yes, it's a wake queue.
>>    
> 
> So what's the core issue?  Is the lock_t in the wait_queue a sleeping mutex?

Yep.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-24  9:54         ` Jan Kiszka
@ 2010-02-24 10:04           ` Avi Kivity
  2010-02-24 10:13             ` Jan Kiszka
  0 siblings, 1 reply; 26+ messages in thread
From: Avi Kivity @ 2010-02-24 10:04 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Thomas Gleixner, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

On 02/24/2010 11:54 AM, Jan Kiszka wrote:
> Avi Kivity wrote:
>    
>> On 02/24/2010 11:41 AM, Jan Kiszka wrote:
>>      
>>> Thomas Gleixner wrote:
>>>
>>>        
>>>> On Tue, 23 Feb 2010, Jan Kiszka wrote:
>>>>
>>>>
>>>>          
>>>>> Thomas Gleixner wrote:
>>>>>
>>>>>            
>>>>>> The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
>>>>>> them to raw_spinlock. No change for !RT kernels.
>>>>>>
>>>>>>              
>>>>> Doesn't fly for -rt anymore: pic_irq_update runs under this raw lock and
>>>>> calls kvm_vcpu_kick which tries to wake_up some thread ->   scheduling
>>>>> while atomic.
>>>>>
>>>>>            
>>>> Hmm, a wakeup itself is fine. Is that code waking a wake queue ?
>>>>
>>>>          
>>> Yes, it's a wake queue.
>>>
>>>        
>> So what's the core issue?  Is the lock_t in the wait_queue a sleeping mutex?
>>      
> Yep.
>    

I see.  Won't we hit the same issue when we call pic functions from 
atomic context during the guest entry sequence?

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-24 10:04           ` Avi Kivity
@ 2010-02-24 10:13             ` Jan Kiszka
  2010-02-24 10:17               ` Avi Kivity
  0 siblings, 1 reply; 26+ messages in thread
From: Jan Kiszka @ 2010-02-24 10:13 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Thomas Gleixner, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

Avi Kivity wrote:
> On 02/24/2010 11:54 AM, Jan Kiszka wrote:
>> Avi Kivity wrote:
>>    
>>> On 02/24/2010 11:41 AM, Jan Kiszka wrote:
>>>      
>>>> Thomas Gleixner wrote:
>>>>
>>>>        
>>>>> On Tue, 23 Feb 2010, Jan Kiszka wrote:
>>>>>
>>>>>
>>>>>          
>>>>>> Thomas Gleixner wrote:
>>>>>>
>>>>>>            
>>>>>>> The i8254/i8259 locks need to be real spinlocks on preempt-rt. Convert
>>>>>>> them to raw_spinlock. No change for !RT kernels.
>>>>>>>
>>>>>>>              
>>>>>> Doesn't fly for -rt anymore: pic_irq_update runs under this raw lock and
>>>>>> calls kvm_vcpu_kick which tries to wake_up some thread ->   scheduling
>>>>>> while atomic.
>>>>>>
>>>>>>            
>>>>> Hmm, a wakeup itself is fine. Is that code waking a wake queue ?
>>>>>
>>>>>          
>>>> Yes, it's a wake queue.
>>>>
>>>>        
>>> So what's the core issue?  Is the lock_t in the wait_queue a sleeping mutex?
>>>      
>> Yep.
>>    
> 
> I see.  Won't we hit the same issue when we call pic functions from 
> atomic context during the guest entry sequence?
> 

If there are such call paths, for sure. What concrete path(s) do you
have in mind?

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-24 10:13             ` Jan Kiszka
@ 2010-02-24 10:17               ` Avi Kivity
  2010-02-24 10:22                 ` Jan Kiszka
  0 siblings, 1 reply; 26+ messages in thread
From: Avi Kivity @ 2010-02-24 10:17 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Thomas Gleixner, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

On 02/24/2010 12:13 PM, Jan Kiszka wrote:
>
>    
>> I see.  Won't we hit the same issue when we call pic functions from
>> atomic context during the guest entry sequence?
>>
>>      
> If there are such call paths, for sure. What concrete path(s) do you
> have in mind?
>
>    

vcpu_enter_guest() -> inject_pending_event() -> 
kvm_cpu_{has,get}_interrupt() -> various pic functions if you're unlucky.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-24 10:17               ` Avi Kivity
@ 2010-02-24 10:22                 ` Jan Kiszka
  2010-02-24 10:27                   ` Avi Kivity
  2010-02-24 10:28                   ` Jan Kiszka
  0 siblings, 2 replies; 26+ messages in thread
From: Jan Kiszka @ 2010-02-24 10:22 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Thomas Gleixner, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

Avi Kivity wrote:
> On 02/24/2010 12:13 PM, Jan Kiszka wrote:
>>    
>>> I see.  Won't we hit the same issue when we call pic functions from
>>> atomic context during the guest entry sequence?
>>>
>>>      
>> If there are such call paths, for sure. What concrete path(s) do you
>> have in mind?
>>
>>    
> 
> vcpu_enter_guest() -> inject_pending_event() -> 
> kvm_cpu_{has,get}_interrupt() -> various pic functions if you're unlucky.

But do they kick anyone or just check/pull information? Never saw any
warnings during my tests last year (granted: with older -rt and kvm
versions).

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-24 10:22                 ` Jan Kiszka
@ 2010-02-24 10:27                   ` Avi Kivity
  2010-02-24 10:31                     ` Jan Kiszka
  2010-02-24 10:28                   ` Jan Kiszka
  1 sibling, 1 reply; 26+ messages in thread
From: Avi Kivity @ 2010-02-24 10:27 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Thomas Gleixner, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

On 02/24/2010 12:22 PM, Jan Kiszka wrote:
> Avi Kivity wrote:
>    
>> On 02/24/2010 12:13 PM, Jan Kiszka wrote:
>>      
>>>
>>>        
>>>> I see.  Won't we hit the same issue when we call pic functions from
>>>> atomic context during the guest entry sequence?
>>>>
>>>>
>>>>          
>>> If there are such call paths, for sure. What concrete path(s) do you
>>> have in mind?
>>>
>>>
>>>        
>> vcpu_enter_guest() ->  inject_pending_event() ->
>> kvm_cpu_{has,get}_interrupt() ->  various pic functions if you're unlucky.
>>      
> But do they kick anyone or just check/pull information?

Probably not, kicking should be a side effect (or rather the main 
effect) of queueing an interrupt, not dequeuing it.

> Never saw any
> warnings during my tests last year (granted: with older -rt and kvm
> versions).
>    

Well, most guests kill the pic early on.  Will apply the patch.

-- 
error compiling committee.c: too many arguments to function

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-24 10:22                 ` Jan Kiszka
  2010-02-24 10:27                   ` Avi Kivity
@ 2010-02-24 10:28                   ` Jan Kiszka
  2010-02-24 10:41                     ` Avi Kivity
  1 sibling, 1 reply; 26+ messages in thread
From: Jan Kiszka @ 2010-02-24 10:28 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Thomas Gleixner, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

Jan Kiszka wrote:
> Avi Kivity wrote:
>> On 02/24/2010 12:13 PM, Jan Kiszka wrote:
>>>    
>>>> I see.  Won't we hit the same issue when we call pic functions from
>>>> atomic context during the guest entry sequence?
>>>>
>>>>      
>>> If there are such call paths, for sure. What concrete path(s) do you
>>> have in mind?
>>>
>>>    
>> vcpu_enter_guest() -> inject_pending_event() -> 
>> kvm_cpu_{has,get}_interrupt() -> various pic functions if you're unlucky.
> 
> But do they kick anyone or just check/pull information? Never saw any
> warnings during my tests last year (granted: with older -rt and kvm
> versions).

Mmh, they could if there is > 1 IRQ pending. Guess this just never
triggered in real life due to typical APIC use and low IRQ load during
PIC times in my tests.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-24 10:27                   ` Avi Kivity
@ 2010-02-24 10:31                     ` Jan Kiszka
  0 siblings, 0 replies; 26+ messages in thread
From: Jan Kiszka @ 2010-02-24 10:31 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Thomas Gleixner, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

Avi Kivity wrote:
> On 02/24/2010 12:22 PM, Jan Kiszka wrote:
>> Avi Kivity wrote:
>>    
>>> On 02/24/2010 12:13 PM, Jan Kiszka wrote:
>>>      
>>>>        
>>>>> I see.  Won't we hit the same issue when we call pic functions from
>>>>> atomic context during the guest entry sequence?
>>>>>
>>>>>
>>>>>          
>>>> If there are such call paths, for sure. What concrete path(s) do you
>>>> have in mind?
>>>>
>>>>
>>>>        
>>> vcpu_enter_guest() ->  inject_pending_event() ->
>>> kvm_cpu_{has,get}_interrupt() ->  various pic functions if you're unlucky.
>>>      
>> But do they kick anyone or just check/pull information?
> 
> Probably not, kicking should be a side effect (or rather the main 
> effect) of queueing an interrupt, not dequeuing it.
> 
>> Never saw any
>> warnings during my tests last year (granted: with older -rt and kvm
>> versions).
>>    
> 
> Well, most guests kill the pic early on.  Will apply the patch.
> 

I think it needs some extension: pic_irq_request should only schedule a
wake up on a rising edge of the PIC output.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-24 10:28                   ` Jan Kiszka
@ 2010-02-24 10:41                     ` Avi Kivity
  2010-02-24 11:42                       ` Jan Kiszka
  0 siblings, 1 reply; 26+ messages in thread
From: Avi Kivity @ 2010-02-24 10:41 UTC (permalink / raw)
  To: Jan Kiszka
  Cc: Thomas Gleixner, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

On 02/24/2010 12:28 PM, Jan Kiszka wrote:
> Jan Kiszka wrote:
>    
>> Avi Kivity wrote:
>>      
>>> On 02/24/2010 12:13 PM, Jan Kiszka wrote:
>>>        
>>>>
>>>>          
>>>>> I see.  Won't we hit the same issue when we call pic functions from
>>>>> atomic context during the guest entry sequence?
>>>>>
>>>>>
>>>>>            
>>>> If there are such call paths, for sure. What concrete path(s) do you
>>>> have in mind?
>>>>
>>>>
>>>>          
>>> vcpu_enter_guest() ->  inject_pending_event() ->
>>> kvm_cpu_{has,get}_interrupt() ->  various pic functions if you're unlucky.
>>>        
>> But do they kick anyone or just check/pull information? Never saw any
>> warnings during my tests last year (granted: with older -rt and kvm
>> versions).
>>      
> Mmh, they could if there is>  1 IRQ pending. Guess this just never
> triggered in real life due to typical APIC use and low IRQ load during
> PIC times in my tests.
>    

We could just ignore the wakeup in this path.  It's called in vcpu 
context, so obviously the vcpu is awake and kicking it will only hurt 
your feet.

Longer term, we should clear up this mess.  One possible path is to move 
the pic/lapic/injection stuff out of the the critical section, and use 
vcpu->requests to close the race between querying the pic/lapic and 
entering the guest.

-- 
error compiling committee.c: too many arguments to function


^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] KVM: x86: Kick VCPU outside PIC lock again
  2010-02-24 10:41                     ` Avi Kivity
@ 2010-02-24 11:42                       ` Jan Kiszka
  0 siblings, 0 replies; 26+ messages in thread
From: Jan Kiszka @ 2010-02-24 11:42 UTC (permalink / raw)
  To: Avi Kivity
  Cc: Thomas Gleixner, KVM, Gleb Natapov, RT, Linux Kernel Mailing List

Avi Kivity wrote:
> On 02/24/2010 12:28 PM, Jan Kiszka wrote:
>> Jan Kiszka wrote:
>>    
>>> Avi Kivity wrote:
>>>      
>>>> On 02/24/2010 12:13 PM, Jan Kiszka wrote:
>>>>        
>>>>>          
>>>>>> I see.  Won't we hit the same issue when we call pic functions from
>>>>>> atomic context during the guest entry sequence?
>>>>>>
>>>>>>
>>>>>>            
>>>>> If there are such call paths, for sure. What concrete path(s) do you
>>>>> have in mind?
>>>>>
>>>>>
>>>>>          
>>>> vcpu_enter_guest() ->  inject_pending_event() ->
>>>> kvm_cpu_{has,get}_interrupt() ->  various pic functions if you're unlucky.
>>>>        
>>> But do they kick anyone or just check/pull information? Never saw any
>>> warnings during my tests last year (granted: with older -rt and kvm
>>> versions).
>>>      
>> Mmh, they could if there is>  1 IRQ pending. Guess this just never
>> triggered in real life due to typical APIC use and low IRQ load during
>> PIC times in my tests.
>>    
> 
> We could just ignore the wakeup in this path.  It's called in vcpu 
> context, so obviously the vcpu is awake and kicking it will only hurt 
> your feet.

Looking at kvm_vcpu_kick, this already happens: The wake queue is
checked for pending waiters (ie. non if waking ourself), and no IPI is
sent if we run on the same CPU like the VCPU is on. That explains why
this path is practically safe.

> 
> Longer term, we should clear up this mess.  One possible path is to move 
> the pic/lapic/injection stuff out of the the critical section, and use 
> vcpu->requests to close the race between querying the pic/lapic and 
> entering the guest.
> 

Sounds worthwhile as well.

Jan

-- 
Siemens AG, Corporate Technology, CT T DE IT 1
Corporate Competence Center Embedded Linux

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2010-02-24 11:42 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-02-17 14:00 [patch] x86: kvm: Convert i8254/i8259 locks to raw_spinlocks Thomas Gleixner
2010-02-18  9:12 ` Jan Kiszka
2010-02-18  9:20   ` Avi Kivity
2010-02-18  9:40     ` Jan Kiszka
2010-02-18  9:45       ` Avi Kivity
2010-02-18  9:49         ` Jan Kiszka
2010-02-18  9:53           ` Avi Kivity
2010-02-18  9:50         ` Avi Kivity
2010-02-18 10:05           ` Jan Kiszka
2010-02-18 10:18             ` Avi Kivity
2010-02-19  1:14               ` Marcelo Tosatti
2010-02-18  9:19 ` Avi Kivity
2010-02-23 19:18 ` Jan Kiszka
2010-02-23 22:23   ` Thomas Gleixner
2010-02-24  9:41     ` [PATCH] KVM: x86: Kick VCPU outside PIC lock again Jan Kiszka
2010-02-24  9:48       ` Avi Kivity
2010-02-24  9:54         ` Jan Kiszka
2010-02-24 10:04           ` Avi Kivity
2010-02-24 10:13             ` Jan Kiszka
2010-02-24 10:17               ` Avi Kivity
2010-02-24 10:22                 ` Jan Kiszka
2010-02-24 10:27                   ` Avi Kivity
2010-02-24 10:31                     ` Jan Kiszka
2010-02-24 10:28                   ` Jan Kiszka
2010-02-24 10:41                     ` Avi Kivity
2010-02-24 11:42                       ` Jan Kiszka

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox