All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] make HVM PIC emulation code SMP-safe
@ 2006-05-15 19:11 David Lively
  2006-05-16  0:29 ` Dave Lively
  0 siblings, 1 reply; 10+ messages in thread
From: David Lively @ 2006-05-15 19:11 UTC (permalink / raw)
  To: xen-devel

[-- Attachment #1: Type: text/plain, Size: 1399 bytes --]

The current PIC emulation code for HVM (fully-virtualized) guests
is unsafe on SMP hosts.  Guests can access the PIC from any VCPU,
though they should be (and generally are) controlling access so that
at most a single VCPU is accessing the PIC at any time.  However,
there are two other paths that may access the PIC concurrently with
a guest VCPU:
(1) "the APIC kludge" - cpu_get_interrupt() calls pic_update_irq() to
      bump slave PIC intrs to the master PIC.  This is called from all 
VCPUS.
(2) hvm_pic_assist() calls do_pic_irqs[_clear]() - from VCPU 0 only

With no PIC concurrency controls, SMP HVM guests[1] are unstable (tend
to hang) under high I/O loads (e.g., the LTP aio-stress & dio tests).  
With the
PIC concurrency control introduced in the attached patch, SMP HVM guests
are considerably more stable.  I have yet to see a hang under heavy I/O 
loads.
I've tested only 64-bit SMP guests (but this code is unchanged for 32 
bits), always
passing "noapic" as explained below.

Dave

[1] Note we're passing "noapic" to the Linux kernels we're testing on SMP
guests.  This tells Linux to ignore the I/O APIC.  Without this, we lose too
many hda interrupts for Linux to boot.  The I/O APIC code has many of the
same issues as the PIC code.  I have a similar fix for the I/O APICs, 
but since
it doesn't fix the "lost interrupt" problem I haven't been able to 
adequately test
it yet.

[-- Attachment #2: vpic-smp-safety.patch --]
[-- Type: text/x-patch, Size: 11333 bytes --]

diff -r d056f91cfd95 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c	Sun May 14 20:13:14 2006 +0100
+++ b/xen/arch/x86/hvm/hvm.c	Mon May 15 13:42:12 2006 -0400
@@ -240,11 +240,14 @@ int cpu_get_interrupt(struct vcpu *v, in
 {
     int intno;
     struct hvm_virpic *s = &v->domain->arch.hvm_domain.vpic;
+    unsigned long flags;
 
     if ( (intno = cpu_get_apic_interrupt(v, type)) != -1 ) {
         /* set irq request if a PIC irq is still pending */
         /* XXX: improve that */
+        spin_lock_irqsave(&s->lock, flags);
         pic_update_irq(s);
+        spin_unlock_irqrestore(&s->lock, flags);
         return intno;
     }
     /* read the irq from the PIC */
diff -r d056f91cfd95 xen/arch/x86/hvm/i8259.c
--- a/xen/arch/x86/hvm/i8259.c	Sun May 14 20:13:14 2006 +0100
+++ b/xen/arch/x86/hvm/i8259.c	Mon May 15 13:42:12 2006 -0400
@@ -35,9 +35,13 @@
 #include <asm/current.h>
 
 /* set irq level. If an edge is detected, then the IRR is set to 1 */
+/* Caller must hold vpic lock */
 static inline void pic_set_irq1(PicState *s, int irq, int level)
 {
     int mask;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     mask = 1 << irq;
     if (s->elcr & mask) {
         /* level triggered */
@@ -63,9 +67,13 @@ static inline void pic_set_irq1(PicState
 
 /* return the highest priority found in mask (highest = smallest
    number). Return 8 if no irq */
+/* Caller must hold vpic lock */
 static inline int get_priority(PicState *s, int mask)
 {
     int priority;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     if (mask == 0)
         return 8;
     priority = 0;
@@ -75,9 +83,12 @@ static inline int get_priority(PicState 
 }
 
 /* return the pic wanted interrupt. return -1 if none */
+/* Caller must hold vpic lock */
 static int pic_get_irq(PicState *s)
 {
     int mask, cur_priority, priority;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     mask = s->irr & ~s->imr;
     priority = get_priority(s, mask);
@@ -101,9 +112,12 @@ static int pic_get_irq(PicState *s)
 /* raise irq to CPU if necessary. must be called every time the active
    irq may change */
 /* XXX: should not export it, but it is needed for an APIC kludge */
+/* Caller must hold vpic lock */
 void pic_update_irq(struct hvm_virpic *s)
 {
     int irq2, irq;
+
+    BUG_ON(!spin_is_locked(&s->lock));
 
     /* first look at slave pic */
     irq2 = pic_get_irq(&s->pics[1]);
@@ -122,29 +136,40 @@ void pic_set_irq_new(void *opaque, int i
 void pic_set_irq_new(void *opaque, int irq, int level)
 {
     struct hvm_virpic *s = opaque;
-
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     hvm_vioapic_set_irq(current->domain, irq, level);
     pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
     /* used for IOAPIC irqs */
     if (s->alt_irq_func)
         s->alt_irq_func(s->alt_irq_opaque, irq, level);
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 void do_pic_irqs (struct hvm_virpic *s, uint16_t irqs)
 {
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     s->pics[1].irr |= (uint8_t)(irqs >> 8);
     s->pics[0].irr |= (uint8_t) irqs;
     hvm_vioapic_do_irqs(current->domain, irqs);
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 void do_pic_irqs_clear (struct hvm_virpic *s, uint16_t irqs)
 {
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     s->pics[1].irr &= ~(uint8_t)(irqs >> 8);
     s->pics[0].irr &= ~(uint8_t) irqs;
     hvm_vioapic_do_irqs_clear(current->domain, irqs);
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 /* obsolete function */
@@ -154,8 +179,11 @@ void pic_set_irq(struct hvm_virpic *isa_
 }
 
 /* acknowledge interrupt 'irq' */
+/* Caller must hold vpic lock */
 static inline void pic_intack(PicState *s, int irq)
 {
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     if (s->auto_eoi) {
         if (s->rotate_on_auto_eoi)
             s->priority_add = (irq + 1) & 7;
@@ -170,7 +198,9 @@ int pic_read_irq(struct hvm_virpic *s)
 int pic_read_irq(struct hvm_virpic *s)
 {
     int irq, irq2, intno;
-
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     irq = pic_get_irq(&s->pics[0]);
     if (irq >= 0) {
         pic_intack(&s->pics[0], irq);
@@ -194,13 +224,17 @@ int pic_read_irq(struct hvm_virpic *s)
         intno = s->pics[0].irq_base + irq;
     }
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
         
     return intno;
 }
 
+/* Caller must hold vpic lock */
 static void update_shared_irr(struct hvm_virpic *s, PicState *c)
 {
     uint8_t *pl, *pe;
+
+    BUG_ON(!spin_is_locked(&s->lock));
 
     get_sp(current->domain)->sp_global.pic_elcr = 
 		s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
@@ -216,9 +250,12 @@ static void update_shared_irr(struct hvm
     }
 }
 
+/* Caller must hold vpic lock */
 static void pic_reset(void *opaque)
 {
     PicState *s = opaque;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     s->last_irr = 0;
     s->irr = 0;
@@ -237,10 +274,13 @@ static void pic_reset(void *opaque)
     s->elcr = 0;
 }
 
+/* Caller must hold vpic lock */
 static void pic_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     PicState *s = opaque;
     int priority, cmd, irq;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     addr &= 1;
     if (addr == 0) {
@@ -328,9 +368,12 @@ static void pic_ioport_write(void *opaqu
     }
 }
 
+/* Caller must hold vpic lock */
 static uint32_t pic_poll_read (PicState *s, uint32_t addr1)
 {
     int ret;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     ret = pic_get_irq(s);
     if (ret >= 0) {
@@ -350,11 +393,14 @@ static uint32_t pic_poll_read (PicState 
     return ret;
 }
 
+/* Caller must hold vpic lock */
 static uint32_t pic_ioport_read(void *opaque, uint32_t addr1)
 {
     PicState *s = opaque;
     unsigned int addr;
     int ret;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     addr = addr1;
     addr &= 1;
@@ -375,23 +421,30 @@ static uint32_t pic_ioport_read(void *op
 }
 
 /* memory mapped interrupt status */
-/* XXX: may be the same than pic_read_irq() */
+/* XXX: may be the same than pic_read_rq() */
 uint32_t pic_intack_read(struct hvm_virpic *s)
 {
     int ret;
-
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     ret = pic_poll_read(&s->pics[0], 0x00);
     if (ret == 2)
         ret = pic_poll_read(&s->pics[1], 0x80) + 8;
     /* Prepare for ISR read */
     s->pics[0].read_reg_select = 1;
+    spin_unlock_irqrestore(&s->lock, flags);
     
     return ret;
 }
 
 static void elcr_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+/* Caller must hold vpic lock */
 {
     PicState *s = opaque;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     s->elcr = val & s->elcr_mask;
 }
 
@@ -402,23 +455,31 @@ static uint32_t elcr_ioport_read(void *o
 }
 
 /* XXX: add generic master/slave system */
+/* Caller must hold vpic lock */
 static void pic_init1(int io_addr, int elcr_addr, PicState *s)
 {
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     pic_reset(s);
 }
 
 void pic_init(struct hvm_virpic *s, void (*irq_request)(void *, int),
               void *irq_request_opaque)
 {
+    unsigned long flags;
+
     memset(s, 0, sizeof(*s));
+    spin_lock_init(&s->lock);
+    s->pics[0].pics_state = s;
+    s->pics[1].pics_state = s;
+    spin_lock_irqsave(&s->lock, flags);
     pic_init1(0x20, 0x4d0, &s->pics[0]);
     pic_init1(0xa0, 0x4d1, &s->pics[1]);
+    spin_unlock_irqrestore(&s->lock, flags);
     s->pics[0].elcr_mask = 0xf8;
     s->pics[1].elcr_mask = 0xde;
     s->irq_request = irq_request;
     s->irq_request_opaque = irq_request_opaque;
-    s->pics[0].pics_state = s;
-    s->pics[1].pics_state = s;
     return; 
 }
 
@@ -426,8 +487,12 @@ void pic_set_alt_irq_func(struct hvm_vir
                           void (*alt_irq_func)(void *, int, int),
                           void *alt_irq_opaque)
 {
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     s->alt_irq_func = alt_irq_func;
     s->alt_irq_opaque = alt_irq_opaque;
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 static int intercept_pic_io(ioreq_t *p)
@@ -435,6 +500,7 @@ static int intercept_pic_io(ioreq_t *p)
     struct hvm_virpic  *pic;
     struct vcpu *v = current;
     uint32_t data;
+    unsigned long flags;
     
     if ( p->size != 1 || p->count != 1) {
         printk("PIC_IO wrong access size %d!\n", (int)p->size);
@@ -446,12 +512,16 @@ static int intercept_pic_io(ioreq_t *p)
             hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_IN);
         else
             data = p->u.data;
+	spin_lock_irqsave(&pic->lock, flags);
         pic_ioport_write((void*)&pic->pics[p->addr>>7],
                 (uint32_t) p->addr, (uint32_t) (data & 0xff));
+	spin_unlock_irqrestore(&pic->lock, flags);
     }
     else {
+	spin_lock_irqsave(&pic->lock, flags);
         data = pic_ioport_read(
             (void*)&pic->pics[p->addr>>7], (uint32_t) p->addr);
+	spin_unlock_irqrestore(&pic->lock, flags);
         if(p->pdata_valid) 
             hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_OUT);
         else 
@@ -465,6 +535,7 @@ static int intercept_elcr_io(ioreq_t *p)
     struct hvm_virpic  *s;
     struct vcpu *v = current;
     uint32_t data;
+    unsigned long flags;
     
     if ( p->size != 1 || p->count != 1 ) {
         printk("PIC_IO wrong access size %d!\n", (int)p->size);
@@ -477,10 +548,12 @@ static int intercept_elcr_io(ioreq_t *p)
             hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_IN);
         else
             data = p->u.data;
+	spin_lock_irqsave(&s->lock, flags);
         elcr_ioport_write((void*)&s->pics[p->addr&1],
                 (uint32_t) p->addr, (uint32_t)( data & 0xff));
     	get_sp(current->domain)->sp_global.pic_elcr = 
             s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
+	spin_unlock_irqrestore(&s->lock, flags);
     }
     else {
         data = (u64) elcr_ioport_read(
@@ -512,10 +585,9 @@ int cpu_get_pic_interrupt(struct vcpu *v
     if ( !vlapic_accept_pic_intr(v) )
         return -1;
 
-    if ( !plat->interrupt_request )
-        return -1;
-
-    plat->interrupt_request = 0;
+    if (cmpxchg(&plat->interrupt_request, 1, 0) != 1)
+	return -1;
+
     /* read the irq from the PIC */
     intno = pic_read_irq(s);
     *type = VLAPIC_DELIV_MODE_EXT;
diff -r d056f91cfd95 xen/include/asm-x86/hvm/vpic.h
--- a/xen/include/asm-x86/hvm/vpic.h	Sun May 14 20:13:14 2006 +0100
+++ b/xen/include/asm-x86/hvm/vpic.h	Mon May 15 13:42:12 2006 -0400
@@ -60,6 +60,7 @@ struct hvm_virpic {
     /* IOAPIC callback support */
     void (*alt_irq_func)(void *opaque, int irq_num, int level);
     void *alt_irq_opaque;
+    spinlock_t lock;
 };
 
 
@@ -72,7 +73,7 @@ void pic_set_alt_irq_func(struct hvm_vir
                           void (*alt_irq_func)(void *, int, int),
                           void *alt_irq_opaque);
 int pic_read_irq(struct hvm_virpic *s);
-void pic_update_irq(struct hvm_virpic *s);
+void pic_update_irq(struct hvm_virpic *s);	/* Caller must hold s->lock */
 uint32_t pic_intack_read(struct hvm_virpic *s);
 void register_pic_io_hook (void);
 int cpu_get_pic_interrupt(struct vcpu *v, int *type);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] make HVM PIC emulation code SMP-safe
  2006-05-15 19:11 [PATCH] make HVM PIC emulation code SMP-safe David Lively
@ 2006-05-16  0:29 ` Dave Lively
  0 siblings, 0 replies; 10+ messages in thread
From: Dave Lively @ 2006-05-16  0:29 UTC (permalink / raw)
  To: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 1762 bytes --]

Resubmit of same with signed-off-by tag ...

On 5/15/06, David Lively <dlively@virtualiron.com> wrote:
>
> The current PIC emulation code for HVM (fully-virtualized) guests
> is unsafe on SMP hosts.  Guests can access the PIC from any VCPU,
> though they should be (and generally are) controlling access so that
> at most a single VCPU is accessing the PIC at any time.  However,
> there are two other paths that may access the PIC concurrently with
> a guest VCPU:
> (1) "the APIC kludge" - cpu_get_interrupt() calls pic_update_irq() to
>       bump slave PIC intrs to the master PIC.  This is called from all
> VCPUS.
> (2) hvm_pic_assist() calls do_pic_irqs[_clear]() - from VCPU 0 only
>
> With no PIC concurrency controls, SMP HVM guests[1] are unstable (tend
> to hang) under high I/O loads (e.g., the LTP aio-stress & dio tests).
> With the
> PIC concurrency control introduced in the attached patch, SMP HVM guests
> are considerably more stable.  I have yet to see a hang under heavy I/O
> loads.
> I've tested only 64-bit SMP guests (but this code is unchanged for 32
> bits), always
> passing "noapic" as explained below.
>
> Dave
>
> [1] Note we're passing "noapic" to the Linux kernels we're testing on SMP
> guests.  This tells Linux to ignore the I/O APIC.  Without this, we lose
> too
> many hda interrupts for Linux to boot.  The I/O APIC code has many of the
> same issues as the PIC code.  I have a similar fix for the I/O APICs,
> but since
> it doesn't fix the "lost interrupt" problem I haven't been able to
> adequately test
> it yet.
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
>
>
>
>

[-- Attachment #1.2: Type: text/html, Size: 2529 bytes --]

[-- Attachment #2: vpic-smp-safety.patch --]
[-- Type: text/x-patch, Size: 11391 bytes --]

# Signed-off-by: David Lively <dlively@virtualiron.com>

diff -r d056f91cfd95 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c	Sun May 14 20:13:14 2006 +0100
+++ b/xen/arch/x86/hvm/hvm.c	Mon May 15 13:42:12 2006 -0400
@@ -240,11 +240,14 @@ int cpu_get_interrupt(struct vcpu *v, in
 {
     int intno;
     struct hvm_virpic *s = &v->domain->arch.hvm_domain.vpic;
+    unsigned long flags;
 
     if ( (intno = cpu_get_apic_interrupt(v, type)) != -1 ) {
         /* set irq request if a PIC irq is still pending */
         /* XXX: improve that */
+        spin_lock_irqsave(&s->lock, flags);
         pic_update_irq(s);
+        spin_unlock_irqrestore(&s->lock, flags);
         return intno;
     }
     /* read the irq from the PIC */
diff -r d056f91cfd95 xen/arch/x86/hvm/i8259.c
--- a/xen/arch/x86/hvm/i8259.c	Sun May 14 20:13:14 2006 +0100
+++ b/xen/arch/x86/hvm/i8259.c	Mon May 15 13:42:12 2006 -0400
@@ -35,9 +35,13 @@
 #include <asm/current.h>
 
 /* set irq level. If an edge is detected, then the IRR is set to 1 */
+/* Caller must hold vpic lock */
 static inline void pic_set_irq1(PicState *s, int irq, int level)
 {
     int mask;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     mask = 1 << irq;
     if (s->elcr & mask) {
         /* level triggered */
@@ -63,9 +67,13 @@ static inline void pic_set_irq1(PicState
 
 /* return the highest priority found in mask (highest = smallest
    number). Return 8 if no irq */
+/* Caller must hold vpic lock */
 static inline int get_priority(PicState *s, int mask)
 {
     int priority;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     if (mask == 0)
         return 8;
     priority = 0;
@@ -75,9 +83,12 @@ static inline int get_priority(PicState 
 }
 
 /* return the pic wanted interrupt. return -1 if none */
+/* Caller must hold vpic lock */
 static int pic_get_irq(PicState *s)
 {
     int mask, cur_priority, priority;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     mask = s->irr & ~s->imr;
     priority = get_priority(s, mask);
@@ -101,9 +112,12 @@ static int pic_get_irq(PicState *s)
 /* raise irq to CPU if necessary. must be called every time the active
    irq may change */
 /* XXX: should not export it, but it is needed for an APIC kludge */
+/* Caller must hold vpic lock */
 void pic_update_irq(struct hvm_virpic *s)
 {
     int irq2, irq;
+
+    BUG_ON(!spin_is_locked(&s->lock));
 
     /* first look at slave pic */
     irq2 = pic_get_irq(&s->pics[1]);
@@ -122,29 +136,40 @@ void pic_set_irq_new(void *opaque, int i
 void pic_set_irq_new(void *opaque, int irq, int level)
 {
     struct hvm_virpic *s = opaque;
-
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     hvm_vioapic_set_irq(current->domain, irq, level);
     pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
     /* used for IOAPIC irqs */
     if (s->alt_irq_func)
         s->alt_irq_func(s->alt_irq_opaque, irq, level);
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 void do_pic_irqs (struct hvm_virpic *s, uint16_t irqs)
 {
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     s->pics[1].irr |= (uint8_t)(irqs >> 8);
     s->pics[0].irr |= (uint8_t) irqs;
     hvm_vioapic_do_irqs(current->domain, irqs);
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 void do_pic_irqs_clear (struct hvm_virpic *s, uint16_t irqs)
 {
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     s->pics[1].irr &= ~(uint8_t)(irqs >> 8);
     s->pics[0].irr &= ~(uint8_t) irqs;
     hvm_vioapic_do_irqs_clear(current->domain, irqs);
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 /* obsolete function */
@@ -154,8 +179,11 @@ void pic_set_irq(struct hvm_virpic *isa_
 }
 
 /* acknowledge interrupt 'irq' */
+/* Caller must hold vpic lock */
 static inline void pic_intack(PicState *s, int irq)
 {
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     if (s->auto_eoi) {
         if (s->rotate_on_auto_eoi)
             s->priority_add = (irq + 1) & 7;
@@ -170,7 +198,9 @@ int pic_read_irq(struct hvm_virpic *s)
 int pic_read_irq(struct hvm_virpic *s)
 {
     int irq, irq2, intno;
-
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     irq = pic_get_irq(&s->pics[0]);
     if (irq >= 0) {
         pic_intack(&s->pics[0], irq);
@@ -194,13 +224,17 @@ int pic_read_irq(struct hvm_virpic *s)
         intno = s->pics[0].irq_base + irq;
     }
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
         
     return intno;
 }
 
+/* Caller must hold vpic lock */
 static void update_shared_irr(struct hvm_virpic *s, PicState *c)
 {
     uint8_t *pl, *pe;
+
+    BUG_ON(!spin_is_locked(&s->lock));
 
     get_sp(current->domain)->sp_global.pic_elcr = 
 		s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
@@ -216,9 +250,12 @@ static void update_shared_irr(struct hvm
     }
 }
 
+/* Caller must hold vpic lock */
 static void pic_reset(void *opaque)
 {
     PicState *s = opaque;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     s->last_irr = 0;
     s->irr = 0;
@@ -237,10 +274,13 @@ static void pic_reset(void *opaque)
     s->elcr = 0;
 }
 
+/* Caller must hold vpic lock */
 static void pic_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     PicState *s = opaque;
     int priority, cmd, irq;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     addr &= 1;
     if (addr == 0) {
@@ -328,9 +368,12 @@ static void pic_ioport_write(void *opaqu
     }
 }
 
+/* Caller must hold vpic lock */
 static uint32_t pic_poll_read (PicState *s, uint32_t addr1)
 {
     int ret;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     ret = pic_get_irq(s);
     if (ret >= 0) {
@@ -350,11 +393,14 @@ static uint32_t pic_poll_read (PicState 
     return ret;
 }
 
+/* Caller must hold vpic lock */
 static uint32_t pic_ioport_read(void *opaque, uint32_t addr1)
 {
     PicState *s = opaque;
     unsigned int addr;
     int ret;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     addr = addr1;
     addr &= 1;
@@ -375,23 +421,30 @@ static uint32_t pic_ioport_read(void *op
 }
 
 /* memory mapped interrupt status */
-/* XXX: may be the same than pic_read_irq() */
+/* XXX: may be the same than pic_read_rq() */
 uint32_t pic_intack_read(struct hvm_virpic *s)
 {
     int ret;
-
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     ret = pic_poll_read(&s->pics[0], 0x00);
     if (ret == 2)
         ret = pic_poll_read(&s->pics[1], 0x80) + 8;
     /* Prepare for ISR read */
     s->pics[0].read_reg_select = 1;
+    spin_unlock_irqrestore(&s->lock, flags);
     
     return ret;
 }
 
 static void elcr_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+/* Caller must hold vpic lock */
 {
     PicState *s = opaque;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     s->elcr = val & s->elcr_mask;
 }
 
@@ -402,23 +455,31 @@ static uint32_t elcr_ioport_read(void *o
 }
 
 /* XXX: add generic master/slave system */
+/* Caller must hold vpic lock */
 static void pic_init1(int io_addr, int elcr_addr, PicState *s)
 {
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     pic_reset(s);
 }
 
 void pic_init(struct hvm_virpic *s, void (*irq_request)(void *, int),
               void *irq_request_opaque)
 {
+    unsigned long flags;
+
     memset(s, 0, sizeof(*s));
+    spin_lock_init(&s->lock);
+    s->pics[0].pics_state = s;
+    s->pics[1].pics_state = s;
+    spin_lock_irqsave(&s->lock, flags);
     pic_init1(0x20, 0x4d0, &s->pics[0]);
     pic_init1(0xa0, 0x4d1, &s->pics[1]);
+    spin_unlock_irqrestore(&s->lock, flags);
     s->pics[0].elcr_mask = 0xf8;
     s->pics[1].elcr_mask = 0xde;
     s->irq_request = irq_request;
     s->irq_request_opaque = irq_request_opaque;
-    s->pics[0].pics_state = s;
-    s->pics[1].pics_state = s;
     return; 
 }
 
@@ -426,8 +487,12 @@ void pic_set_alt_irq_func(struct hvm_vir
                           void (*alt_irq_func)(void *, int, int),
                           void *alt_irq_opaque)
 {
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     s->alt_irq_func = alt_irq_func;
     s->alt_irq_opaque = alt_irq_opaque;
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 static int intercept_pic_io(ioreq_t *p)
@@ -435,6 +500,7 @@ static int intercept_pic_io(ioreq_t *p)
     struct hvm_virpic  *pic;
     struct vcpu *v = current;
     uint32_t data;
+    unsigned long flags;
     
     if ( p->size != 1 || p->count != 1) {
         printk("PIC_IO wrong access size %d!\n", (int)p->size);
@@ -446,12 +512,16 @@ static int intercept_pic_io(ioreq_t *p)
             hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_IN);
         else
             data = p->u.data;
+	spin_lock_irqsave(&pic->lock, flags);
         pic_ioport_write((void*)&pic->pics[p->addr>>7],
                 (uint32_t) p->addr, (uint32_t) (data & 0xff));
+	spin_unlock_irqrestore(&pic->lock, flags);
     }
     else {
+	spin_lock_irqsave(&pic->lock, flags);
         data = pic_ioport_read(
             (void*)&pic->pics[p->addr>>7], (uint32_t) p->addr);
+	spin_unlock_irqrestore(&pic->lock, flags);
         if(p->pdata_valid) 
             hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_OUT);
         else 
@@ -465,6 +535,7 @@ static int intercept_elcr_io(ioreq_t *p)
     struct hvm_virpic  *s;
     struct vcpu *v = current;
     uint32_t data;
+    unsigned long flags;
     
     if ( p->size != 1 || p->count != 1 ) {
         printk("PIC_IO wrong access size %d!\n", (int)p->size);
@@ -477,10 +548,12 @@ static int intercept_elcr_io(ioreq_t *p)
             hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_IN);
         else
             data = p->u.data;
+	spin_lock_irqsave(&s->lock, flags);
         elcr_ioport_write((void*)&s->pics[p->addr&1],
                 (uint32_t) p->addr, (uint32_t)( data & 0xff));
     	get_sp(current->domain)->sp_global.pic_elcr = 
             s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
+	spin_unlock_irqrestore(&s->lock, flags);
     }
     else {
         data = (u64) elcr_ioport_read(
@@ -512,10 +585,9 @@ int cpu_get_pic_interrupt(struct vcpu *v
     if ( !vlapic_accept_pic_intr(v) )
         return -1;
 
-    if ( !plat->interrupt_request )
-        return -1;
-
-    plat->interrupt_request = 0;
+    if (cmpxchg(&plat->interrupt_request, 1, 0) != 1)
+	return -1;
+
     /* read the irq from the PIC */
     intno = pic_read_irq(s);
     *type = VLAPIC_DELIV_MODE_EXT;
diff -r d056f91cfd95 xen/include/asm-x86/hvm/vpic.h
--- a/xen/include/asm-x86/hvm/vpic.h	Sun May 14 20:13:14 2006 +0100
+++ b/xen/include/asm-x86/hvm/vpic.h	Mon May 15 13:42:12 2006 -0400
@@ -60,6 +60,7 @@ struct hvm_virpic {
     /* IOAPIC callback support */
     void (*alt_irq_func)(void *opaque, int irq_num, int level);
     void *alt_irq_opaque;
+    spinlock_t lock;
 };
 
 
@@ -72,7 +73,7 @@ void pic_set_alt_irq_func(struct hvm_vir
                           void (*alt_irq_func)(void *, int, int),
                           void *alt_irq_opaque);
 int pic_read_irq(struct hvm_virpic *s);
-void pic_update_irq(struct hvm_virpic *s);
+void pic_update_irq(struct hvm_virpic *s);	/* Caller must hold s->lock */
 uint32_t pic_intack_read(struct hvm_virpic *s);
 void register_pic_io_hook (void);
 int cpu_get_pic_interrupt(struct vcpu *v, int *type);


[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] make HVM PIC emulation code SMP-safe
@ 2006-05-16  1:58 Dong, Eddie
  0 siblings, 0 replies; 10+ messages in thread
From: Dong, Eddie @ 2006-05-16  1:58 UTC (permalink / raw)
  To: Dave Lively, xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 2374 bytes --]

Dave:
    I think we'd better to make sure cpu_get_interrupt be called only in VP0 too, but IOAPIC code may need sonsolidation for SMP safe. hvm_pic_assist is safe as it is only happen in VP0.
    When switching from PIC to APIC in SMP, the guest will disable PIC when IOAPIC/APIC is enabled. 
thx,eddie

________________________________

From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Dave Lively
Sent: 2006年5月16日 8:29
To: xen-devel@lists.xensource.com
Subject: Re: [Xen-devel] [PATCH] make HVM PIC emulation code SMP-safe


Resubmit of same with signed-off-by tag ...


On 5/15/06, David Lively < dlively@virtualiron.com <mailto:dlively@virtualiron.com> > wrote: 

	The current PIC emulation code for HVM (fully-virtualized) guests
	is unsafe on SMP hosts.  Guests can access the PIC from any VCPU, 
	though they should be (and generally are) controlling access so that
	at most a single VCPU is accessing the PIC at any time.  However,
	there are two other paths that may access the PIC concurrently with
	a guest VCPU: 
	(1) "the APIC kludge" - cpu_get_interrupt() calls pic_update_irq() to
	      bump slave PIC intrs to the master PIC.  This is called from all
	VCPUS.
	(2) hvm_pic_assist() calls do_pic_irqs[_clear]() - from VCPU 0 only 
	
	With no PIC concurrency controls, SMP HVM guests[1] are unstable (tend
	to hang) under high I/O loads (e.g., the LTP aio-stress & dio tests).
	With the
	PIC concurrency control introduced in the attached patch, SMP HVM guests 
	are considerably more stable.  I have yet to see a hang under heavy I/O
	loads.
	I've tested only 64-bit SMP guests (but this code is unchanged for 32
	bits), always
	passing "noapic" as explained below. 
	
	Dave
	
	[1] Note we're passing "noapic" to the Linux kernels we're testing on SMP
	guests.  This tells Linux to ignore the I/O APIC.  Without this, we lose too
	many hda interrupts for Linux to boot.  The I/O APIC code has many of the 
	same issues as the PIC code.  I have a similar fix for the I/O APICs,
	but since
	it doesn't fix the "lost interrupt" problem I haven't been able to
	adequately test
	it yet.
	
	
	_______________________________________________ 
	Xen-devel mailing list
	Xen-devel@lists.xensource.com
	http://lists.xensource.com/xen-devel
	
	
	
	



[-- Attachment #1.2: Type: text/html, Size: 4097 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] make HVM PIC emulation code SMP-safe
@ 2006-05-16  5:20 Dong, Eddie
  2006-05-16  7:56 ` Keir Fraser
  0 siblings, 1 reply; 10+ messages in thread
From: Dong, Eddie @ 2006-05-16  5:20 UTC (permalink / raw)
  To: Dong, Eddie, Dave Lively, xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 3317 bytes --]

 For the PIC I/O access, do we need lock protect? But for IRQ generation, i.e. cpu_get_interrupt, how about following for simplicity (not tested)?
 
diff -r 1e3977e029fd xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c    Mon May 08 19:21:41 2006 +0100
+++ b/xen/arch/x86/hvm/hvm.c    Tue May 16 13:18:04 2006 +0800
@@ -248,7 +248,7 @@ int cpu_get_interrupt(struct vcpu *v, in
         return intno;
     }
     /* read the irq from the PIC */
-    if ( (intno = cpu_get_pic_interrupt(v, type)) != -1 )
+    if ( v->vcpu_id == 0 && (intno = cpu_get_pic_interrupt(v, type)) != -1 )
         return intno;
 
     return -1;

________________________________

From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Dong, Eddie
Sent: 2006年5月16日 9:58
To: Dave Lively; xen-devel@lists.xensource.com
Subject: RE: [Xen-devel] [PATCH] make HVM PIC emulation code SMP-safe


Dave:
    I think we'd better to make sure cpu_get_interrupt be called only in VP0 too, but IOAPIC code may need sonsolidation for SMP safe. hvm_pic_assist is safe as it is only happen in VP0.
    When switching from PIC to APIC in SMP, the guest will disable PIC when IOAPIC/APIC is enabled. 
thx,eddie

________________________________

From: xen-devel-bounces@lists.xensource.com [mailto:xen-devel-bounces@lists.xensource.com] On Behalf Of Dave Lively
Sent: 2006年5月16日 8:29
To: xen-devel@lists.xensource.com
Subject: Re: [Xen-devel] [PATCH] make HVM PIC emulation code SMP-safe


Resubmit of same with signed-off-by tag ...


On 5/15/06, David Lively < dlively@virtualiron.com <mailto:dlively@virtualiron.com> > wrote: 

	The current PIC emulation code for HVM (fully-virtualized) guests
	is unsafe on SMP hosts.  Guests can access the PIC from any VCPU, 
	though they should be (and generally are) controlling access so that
	at most a single VCPU is accessing the PIC at any time.  However,
	there are two other paths that may access the PIC concurrently with
	a guest VCPU: 
	(1) "the APIC kludge" - cpu_get_interrupt() calls pic_update_irq() to
	      bump slave PIC intrs to the master PIC.  This is called from all
	VCPUS.
	(2) hvm_pic_assist() calls do_pic_irqs[_clear]() - from VCPU 0 only 
	
	With no PIC concurrency controls, SMP HVM guests[1] are unstable (tend
	to hang) under high I/O loads (e.g., the LTP aio-stress & dio tests).
	With the
	PIC concurrency control introduced in the attached patch, SMP HVM guests 
	are considerably more stable.  I have yet to see a hang under heavy I/O
	loads.
	I've tested only 64-bit SMP guests (but this code is unchanged for 32
	bits), always
	passing "noapic" as explained below. 
	
	Dave
	
	[1] Note we're passing "noapic" to the Linux kernels we're testing on SMP
	guests.  This tells Linux to ignore the I/O APIC.  Without this, we lose too
	many hda interrupts for Linux to boot.  The I/O APIC code has many of the 
	same issues as the PIC code.  I have a similar fix for the I/O APICs,
	but since
	it doesn't fix the "lost interrupt" problem I haven't been able to
	adequately test
	it yet.
	
	
	_______________________________________________ 
	Xen-devel mailing list
	Xen-devel@lists.xensource.com
	http://lists.xensource.com/xen-devel
	
	
	
	



[-- Attachment #1.2: Type: text/html, Size: 5900 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] make HVM PIC emulation code SMP-safe
  2006-05-16  5:20 Dong, Eddie
@ 2006-05-16  7:56 ` Keir Fraser
  2006-05-16 15:05   ` Dave Lively
  0 siblings, 1 reply; 10+ messages in thread
From: Keir Fraser @ 2006-05-16  7:56 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: xen-devel, Dave Lively


Well, that would make it behave like a *real* PIC, wired through to 
CPU0's INT pin, right? So it sounds good to me, especially if we are 
currently spraying PIC interrupts across all VCPUs. And it's even 
better if it avoids requiring locking on the vmentry path.

  -- Keir

On 16 May 2006, at 06:20, Dong, Eddie wrote:

>  For the PIC I/O access, do we need lock protect? But for IRQ 
> generation, i.e. cpu_get_interrupt, how about following for simplicity 
> (not tested)?
>  
> diff -r 1e3977e029fd xen/arch/x86/hvm/hvm.c
> --- a/xen/arch/x86/hvm/hvm.c    Mon May 08 19:21:41 2006 +0100
> +++ b/xen/arch/x86/hvm/hvm.c    Tue May 16 13:18:04 2006 +0800
> @@ -248,7 +248,7 @@ int cpu_get_interrupt(struct vcpu *v, in
>          return intno;
>      }
>      /* read the irq from the PIC */
> -    if ( (intno = cpu_get_pic_interrupt(v, type)) != -1 )
> +    if ( v->vcpu_id == 0 && (intno = cpu_get_pic_interrupt(v, type)) 
> != -1 )
>          return intno;
>  
>      return -1;

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] make HVM PIC emulation code SMP-safe
  2006-05-16  7:56 ` Keir Fraser
@ 2006-05-16 15:05   ` Dave Lively
  0 siblings, 0 replies; 10+ messages in thread
From: Dave Lively @ 2006-05-16 15:05 UTC (permalink / raw)
  To: Keir Fraser; +Cc: xen-devel, Dong, Eddie


[-- Attachment #1.1: Type: text/plain, Size: 2485 bytes --]

Hi Keir / Eddie -

  I was also thinking cpu_get_pic_interrupt() should be called only on
VCPU 0 (and in fact, originally tested with restriction).  But since I
wasn't sure the restriction was necessary (and things worked fine
without it), I removed it.  I'll put it back in now, in light of these
comments.  (Revised patch attached.)

  However, that doesn't remove the need for locking (or some sort
of PIC concurrency control).  First (and most importantly), the guest
can access the PIC from *any* VCPU, though it must be careful to
access it from at most one VCPU at once.  This alone means it can
conflict with hypervisor PIC access from VCPU 0.  (Note that the
"APIC kludge" causes PIC acccess from arbitrary VCPUs.  So this
patch fixes that as well.  But the patch would be necessary even without
this kludge.)

  I'm not wild about having locks on this path either.  A safe lockless
protocol may be possible, but I don't have the time to try to work one
out now.  In our experience, this patch greatly improves the stability
of SMP guests.  (But note we currently tell SMP guest kernels to
ignore the I/O APIC, otherwise we lose too many hda interrupts.)

Dave

P.S. The I/O APIC code has similar issues, for mostly the same
reasons (guests can access from any VCPU).  I have a similar patch
for it, but haven't been able to test it yet (due to other I/O APIC
problems).

On 5/16/06, Keir Fraser <Keir.Fraser@cl.cam.ac.uk> wrote:
>
>
> Well, that would make it behave like a *real* PIC, wired through to
> CPU0's INT pin, right? So it sounds good to me, especially if we are
> currently spraying PIC interrupts across all VCPUs. And it's even
> better if it avoids requiring locking on the vmentry path.
>
>   -- Keir
>
> On 16 May 2006, at 06:20, Dong, Eddie wrote:
>
> > For the PIC I/O access, do weneed lock protect? But for IRQ
> > generation, i.e. cpu_get_interrupt, how about following for simplicity
> > (not tested)?
> >
> > diff -r 1e3977e029fd xen/arch/x86/hvm/hvm.c
> > --- a/xen/arch/x86/hvm/hvm.c Mon May 08 19:21:41 2006 +0100
> > +++ b/xen/arch/x86/hvm/hvm.c Tue May 16 13:18:04 2006 +0800
> > @@ -248,7 +248,7 @@ int cpu_get_interrupt(struct vcpu *v, in
> > return intno;
> > }
> > /* read the irq from the PIC */
> > - if ( (intno = cpu_get_pic_interrupt(v, type)) != -1 )
> > + if ( v->vcpu_id == 0 && (intno = cpu_get_pic_interrupt(v, type))
> > != -1 )
> > return intno;
> >
> > return -1;
>
>

[-- Attachment #1.2: Type: text/html, Size: 3048 bytes --]

[-- Attachment #2: vpic-smp-safety.patch --]
[-- Type: text/x-patch, Size: 11566 bytes --]

Signed-off-by: David Lively <dlively@virtualiron.com>

diff -r d056f91cfd95 xen/arch/x86/hvm/hvm.c
--- a/xen/arch/x86/hvm/hvm.c	Sun May 14 20:13:14 2006 +0100
+++ b/xen/arch/x86/hvm/hvm.c	Tue May 16 11:02:59 2006 -0400
@@ -240,15 +240,18 @@ int cpu_get_interrupt(struct vcpu *v, in
 {
     int intno;
     struct hvm_virpic *s = &v->domain->arch.hvm_domain.vpic;
+    unsigned long flags;
 
     if ( (intno = cpu_get_apic_interrupt(v, type)) != -1 ) {
         /* set irq request if a PIC irq is still pending */
         /* XXX: improve that */
+        spin_lock_irqsave(&s->lock, flags);
         pic_update_irq(s);
+        spin_unlock_irqrestore(&s->lock, flags);
         return intno;
     }
     /* read the irq from the PIC */
-    if ( (intno = cpu_get_pic_interrupt(v, type)) != -1 )
+    if ( v->vcpu_id == 0 && (intno = cpu_get_pic_interrupt(v, type)) != -1 )
         return intno;
 
     return -1;
diff -r d056f91cfd95 xen/arch/x86/hvm/i8259.c
--- a/xen/arch/x86/hvm/i8259.c	Sun May 14 20:13:14 2006 +0100
+++ b/xen/arch/x86/hvm/i8259.c	Tue May 16 11:02:59 2006 -0400
@@ -35,9 +35,13 @@
 #include <asm/current.h>
 
 /* set irq level. If an edge is detected, then the IRR is set to 1 */
+/* Caller must hold vpic lock */
 static inline void pic_set_irq1(PicState *s, int irq, int level)
 {
     int mask;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     mask = 1 << irq;
     if (s->elcr & mask) {
         /* level triggered */
@@ -63,9 +67,13 @@ static inline void pic_set_irq1(PicState
 
 /* return the highest priority found in mask (highest = smallest
    number). Return 8 if no irq */
+/* Caller must hold vpic lock */
 static inline int get_priority(PicState *s, int mask)
 {
     int priority;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     if (mask == 0)
         return 8;
     priority = 0;
@@ -75,9 +83,12 @@ static inline int get_priority(PicState 
 }
 
 /* return the pic wanted interrupt. return -1 if none */
+/* Caller must hold vpic lock */
 static int pic_get_irq(PicState *s)
 {
     int mask, cur_priority, priority;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     mask = s->irr & ~s->imr;
     priority = get_priority(s, mask);
@@ -101,9 +112,12 @@ static int pic_get_irq(PicState *s)
 /* raise irq to CPU if necessary. must be called every time the active
    irq may change */
 /* XXX: should not export it, but it is needed for an APIC kludge */
+/* Caller must hold vpic lock */
 void pic_update_irq(struct hvm_virpic *s)
 {
     int irq2, irq;
+
+    BUG_ON(!spin_is_locked(&s->lock));
 
     /* first look at slave pic */
     irq2 = pic_get_irq(&s->pics[1]);
@@ -122,29 +136,40 @@ void pic_set_irq_new(void *opaque, int i
 void pic_set_irq_new(void *opaque, int irq, int level)
 {
     struct hvm_virpic *s = opaque;
-
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     hvm_vioapic_set_irq(current->domain, irq, level);
     pic_set_irq1(&s->pics[irq >> 3], irq & 7, level);
     /* used for IOAPIC irqs */
     if (s->alt_irq_func)
         s->alt_irq_func(s->alt_irq_opaque, irq, level);
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 void do_pic_irqs (struct hvm_virpic *s, uint16_t irqs)
 {
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     s->pics[1].irr |= (uint8_t)(irqs >> 8);
     s->pics[0].irr |= (uint8_t) irqs;
     hvm_vioapic_do_irqs(current->domain, irqs);
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 void do_pic_irqs_clear (struct hvm_virpic *s, uint16_t irqs)
 {
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     s->pics[1].irr &= ~(uint8_t)(irqs >> 8);
     s->pics[0].irr &= ~(uint8_t) irqs;
     hvm_vioapic_do_irqs_clear(current->domain, irqs);
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 /* obsolete function */
@@ -154,8 +179,11 @@ void pic_set_irq(struct hvm_virpic *isa_
 }
 
 /* acknowledge interrupt 'irq' */
+/* Caller must hold vpic lock */
 static inline void pic_intack(PicState *s, int irq)
 {
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     if (s->auto_eoi) {
         if (s->rotate_on_auto_eoi)
             s->priority_add = (irq + 1) & 7;
@@ -170,7 +198,9 @@ int pic_read_irq(struct hvm_virpic *s)
 int pic_read_irq(struct hvm_virpic *s)
 {
     int irq, irq2, intno;
-
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     irq = pic_get_irq(&s->pics[0]);
     if (irq >= 0) {
         pic_intack(&s->pics[0], irq);
@@ -194,13 +224,17 @@ int pic_read_irq(struct hvm_virpic *s)
         intno = s->pics[0].irq_base + irq;
     }
     pic_update_irq(s);
+    spin_unlock_irqrestore(&s->lock, flags);
         
     return intno;
 }
 
+/* Caller must hold vpic lock */
 static void update_shared_irr(struct hvm_virpic *s, PicState *c)
 {
     uint8_t *pl, *pe;
+
+    BUG_ON(!spin_is_locked(&s->lock));
 
     get_sp(current->domain)->sp_global.pic_elcr = 
 		s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
@@ -216,9 +250,12 @@ static void update_shared_irr(struct hvm
     }
 }
 
+/* Caller must hold vpic lock */
 static void pic_reset(void *opaque)
 {
     PicState *s = opaque;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     s->last_irr = 0;
     s->irr = 0;
@@ -237,10 +274,13 @@ static void pic_reset(void *opaque)
     s->elcr = 0;
 }
 
+/* Caller must hold vpic lock */
 static void pic_ioport_write(void *opaque, uint32_t addr, uint32_t val)
 {
     PicState *s = opaque;
     int priority, cmd, irq;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     addr &= 1;
     if (addr == 0) {
@@ -328,9 +368,12 @@ static void pic_ioport_write(void *opaqu
     }
 }
 
+/* Caller must hold vpic lock */
 static uint32_t pic_poll_read (PicState *s, uint32_t addr1)
 {
     int ret;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     ret = pic_get_irq(s);
     if (ret >= 0) {
@@ -350,11 +393,14 @@ static uint32_t pic_poll_read (PicState 
     return ret;
 }
 
+/* Caller must hold vpic lock */
 static uint32_t pic_ioport_read(void *opaque, uint32_t addr1)
 {
     PicState *s = opaque;
     unsigned int addr;
     int ret;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
 
     addr = addr1;
     addr &= 1;
@@ -375,23 +421,30 @@ static uint32_t pic_ioport_read(void *op
 }
 
 /* memory mapped interrupt status */
-/* XXX: may be the same than pic_read_irq() */
+/* XXX: may be the same than pic_read_rq() */
 uint32_t pic_intack_read(struct hvm_virpic *s)
 {
     int ret;
-
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     ret = pic_poll_read(&s->pics[0], 0x00);
     if (ret == 2)
         ret = pic_poll_read(&s->pics[1], 0x80) + 8;
     /* Prepare for ISR read */
     s->pics[0].read_reg_select = 1;
+    spin_unlock_irqrestore(&s->lock, flags);
     
     return ret;
 }
 
 static void elcr_ioport_write(void *opaque, uint32_t addr, uint32_t val)
+/* Caller must hold vpic lock */
 {
     PicState *s = opaque;
+
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     s->elcr = val & s->elcr_mask;
 }
 
@@ -402,23 +455,31 @@ static uint32_t elcr_ioport_read(void *o
 }
 
 /* XXX: add generic master/slave system */
+/* Caller must hold vpic lock */
 static void pic_init1(int io_addr, int elcr_addr, PicState *s)
 {
+    BUG_ON(!spin_is_locked(&s->pics_state->lock));
+
     pic_reset(s);
 }
 
 void pic_init(struct hvm_virpic *s, void (*irq_request)(void *, int),
               void *irq_request_opaque)
 {
+    unsigned long flags;
+
     memset(s, 0, sizeof(*s));
+    spin_lock_init(&s->lock);
+    s->pics[0].pics_state = s;
+    s->pics[1].pics_state = s;
+    spin_lock_irqsave(&s->lock, flags);
     pic_init1(0x20, 0x4d0, &s->pics[0]);
     pic_init1(0xa0, 0x4d1, &s->pics[1]);
+    spin_unlock_irqrestore(&s->lock, flags);
     s->pics[0].elcr_mask = 0xf8;
     s->pics[1].elcr_mask = 0xde;
     s->irq_request = irq_request;
     s->irq_request_opaque = irq_request_opaque;
-    s->pics[0].pics_state = s;
-    s->pics[1].pics_state = s;
     return; 
 }
 
@@ -426,8 +487,12 @@ void pic_set_alt_irq_func(struct hvm_vir
                           void (*alt_irq_func)(void *, int, int),
                           void *alt_irq_opaque)
 {
+    unsigned long flags;
+
+    spin_lock_irqsave(&s->lock, flags);
     s->alt_irq_func = alt_irq_func;
     s->alt_irq_opaque = alt_irq_opaque;
+    spin_unlock_irqrestore(&s->lock, flags);
 }
 
 static int intercept_pic_io(ioreq_t *p)
@@ -435,6 +500,7 @@ static int intercept_pic_io(ioreq_t *p)
     struct hvm_virpic  *pic;
     struct vcpu *v = current;
     uint32_t data;
+    unsigned long flags;
     
     if ( p->size != 1 || p->count != 1) {
         printk("PIC_IO wrong access size %d!\n", (int)p->size);
@@ -446,12 +512,16 @@ static int intercept_pic_io(ioreq_t *p)
             hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_IN);
         else
             data = p->u.data;
+	spin_lock_irqsave(&pic->lock, flags);
         pic_ioport_write((void*)&pic->pics[p->addr>>7],
                 (uint32_t) p->addr, (uint32_t) (data & 0xff));
+	spin_unlock_irqrestore(&pic->lock, flags);
     }
     else {
+	spin_lock_irqsave(&pic->lock, flags);
         data = pic_ioport_read(
             (void*)&pic->pics[p->addr>>7], (uint32_t) p->addr);
+	spin_unlock_irqrestore(&pic->lock, flags);
         if(p->pdata_valid) 
             hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_OUT);
         else 
@@ -465,6 +535,7 @@ static int intercept_elcr_io(ioreq_t *p)
     struct hvm_virpic  *s;
     struct vcpu *v = current;
     uint32_t data;
+    unsigned long flags;
     
     if ( p->size != 1 || p->count != 1 ) {
         printk("PIC_IO wrong access size %d!\n", (int)p->size);
@@ -477,10 +548,12 @@ static int intercept_elcr_io(ioreq_t *p)
             hvm_copy(&data, (unsigned long)p->u.pdata, p->size, HVM_COPY_IN);
         else
             data = p->u.data;
+	spin_lock_irqsave(&s->lock, flags);
         elcr_ioport_write((void*)&s->pics[p->addr&1],
                 (uint32_t) p->addr, (uint32_t)( data & 0xff));
     	get_sp(current->domain)->sp_global.pic_elcr = 
             s->pics[0].elcr | ((u16)s->pics[1].elcr << 8);
+	spin_unlock_irqrestore(&s->lock, flags);
     }
     else {
         data = (u64) elcr_ioport_read(
@@ -512,10 +585,9 @@ int cpu_get_pic_interrupt(struct vcpu *v
     if ( !vlapic_accept_pic_intr(v) )
         return -1;
 
-    if ( !plat->interrupt_request )
-        return -1;
-
-    plat->interrupt_request = 0;
+    if (cmpxchg(&plat->interrupt_request, 1, 0) != 1)
+	return -1;
+
     /* read the irq from the PIC */
     intno = pic_read_irq(s);
     *type = VLAPIC_DELIV_MODE_EXT;
diff -r d056f91cfd95 xen/include/asm-x86/hvm/vpic.h
--- a/xen/include/asm-x86/hvm/vpic.h	Sun May 14 20:13:14 2006 +0100
+++ b/xen/include/asm-x86/hvm/vpic.h	Tue May 16 11:02:59 2006 -0400
@@ -60,6 +60,7 @@ struct hvm_virpic {
     /* IOAPIC callback support */
     void (*alt_irq_func)(void *opaque, int irq_num, int level);
     void *alt_irq_opaque;
+    spinlock_t lock;
 };
 
 
@@ -72,7 +73,7 @@ void pic_set_alt_irq_func(struct hvm_vir
                           void (*alt_irq_func)(void *, int, int),
                           void *alt_irq_opaque);
 int pic_read_irq(struct hvm_virpic *s);
-void pic_update_irq(struct hvm_virpic *s);
+void pic_update_irq(struct hvm_virpic *s);	/* Caller must hold s->lock */
 uint32_t pic_intack_read(struct hvm_virpic *s);
 void register_pic_io_hook (void);
 int cpu_get_pic_interrupt(struct vcpu *v, int *type);

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] make HVM PIC emulation code SMP-safe
@ 2006-05-17  6:01 Dong, Eddie
  2006-05-17  7:07 ` Keir Fraser
  2006-05-18 15:16 ` Dave Lively
  0 siblings, 2 replies; 10+ messages in thread
From: Dong, Eddie @ 2006-05-17  6:01 UTC (permalink / raw)
  To: Dave Lively, Keir Fraser; +Cc: xen-devel


[-- Attachment #1.1: Type: text/plain, Size: 4085 bytes --]

Dave:
    Looks like this patch is just using "BUG_ON" for APIs like pic_ioport_read.
    Also let me share something about 1) HVM virtual Interrupt Controller movement in consideration. When APIC is enabled for SMP, guest software need to disable PIC for correct interrupt generation mechanism. If guest bios is presenting MPS, then we can know this through IMCR base on MPS spec when guest switch from PIC mode to Symmetric I/O Mode. If guest bios is presenting ACPI, the guest software will disable PIC by writing 0xff to master PIC IMR. Current VMX guest implements ACPI.
    In either way, when Xen detects the guest has disabled PIC, the PIC IRQ generation logic can be removed for both performance and simplicity. So even VP0 doesn't need to get/queue interrupt from PIC. The ideal solution may be that we need to define some registable APIs in HVM virtual weired interrupt controller logic. At startup time, we regieter PIC APIs, and later on replace it with APIC's (the transition time need special concern while it is non performance critical path).
    Anyway, 2) PIC I/O access may need to be SMP safe (maybe a big lock for simplicity), 3) IOAPIC needs to be SMP safe. Then we try to keep minimal changes to device models with that in Qemu (yes we already have some) for future maintain effort. You are very welcome here.
    thx,eddie

________________________________

From: Dave Lively [mailto:dave.lively@gmail.com] 
Sent: 2006年5月16日 23:06
To: Keir Fraser
Cc: Dong, Eddie; xen-devel@lists.xensource.com
Subject: Re: [Xen-devel] [PATCH] make HVM PIC emulation code SMP-safe


Hi Keir / Eddie -

  I was also thinking cpu_get_pic_interrupt() should be called only on
VCPU 0 (and in fact, originally tested with restriction).  But since I
wasn't sure the restriction was necessary (and things worked fine
without it), I removed it.  I'll put it back in now, in light of these
comments.  (Revised patch attached.)

  However, that doesn't remove the need for locking (or some sort
of PIC concurrency control).  First (and most importantly), the guest
can access the PIC from *any* VCPU, though it must be careful to
access it from at most one VCPU at once.  This alone means it can
conflict with hypervisor PIC access from VCPU 0.  (Note that the
"APIC kludge" causes PIC acccess from arbitrary VCPUs.  So this
patch fixes that as well.  But the patch would be necessary even without
this kludge.)

  I'm not wild about having locks on this path either.  A safe lockless
protocol may be possible, but I don't have the time to try to work one
out now.  In our experience, this patch greatly improves the stability 
of SMP guests.  (But note we currently tell SMP guest kernels to 
ignore the I/O APIC, otherwise we lose too many hda interrupts.)

Dave

P.S. The I/O APIC code has similar issues, for mostly the same
reasons (guests can access from any VCPU).  I have a similar patch
for it, but haven't been able to test it yet (due to other I/O APIC 
problems).


On 5/16/06, Keir Fraser <Keir.Fraser@cl.cam.ac.uk> wrote: 


	Well, that would make it behave like a *real* PIC, wired through to
	CPU0's INT pin, right? So it sounds good to me, especially if we are
	currently spraying PIC interrupts across all VCPUs. And it's even
	better if it avoids requiring locking on the vmentry path. 
	
	  -- Keir
	
	On 16 May 2006, at 06:20, Dong, Eddie wrote:
	
	> For the PIC I/O access, do weneed lock protect? But for IRQ
	> generation, i.e. cpu_get_interrupt, how about following for simplicity
	> (not tested)?
	> 
	> diff -r 1e3977e029fd xen/arch/x86/hvm/hvm.c
	> --- a/xen/arch/x86/hvm/hvm.c Mon May 08 19:21:41 2006 +0100
	> +++ b/xen/arch/x86/hvm/hvm.c Tue May 16 13:18:04 2006 +0800
	> @@ -248,7 +248,7 @@ int cpu_get_interrupt(struct vcpu *v, in 
	> return intno;
	> }
	> /* read the irq from the PIC */
	> - if ( (intno = cpu_get_pic_interrupt(v, type)) != -1 )
	> + if ( v->vcpu_id == 0 && (intno = cpu_get_pic_interrupt(v, type)) 
	> != -1 )
	> return intno;
	> 
	> return -1;
	
	



[-- Attachment #1.2: Type: text/html, Size: 6136 bytes --]

[-- Attachment #2: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] make HVM PIC emulation code SMP-safe
  2006-05-17  6:01 Dong, Eddie
@ 2006-05-17  7:07 ` Keir Fraser
  2006-05-18 15:16 ` Dave Lively
  1 sibling, 0 replies; 10+ messages in thread
From: Keir Fraser @ 2006-05-17  7:07 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: xen-devel, Dave Lively


On 17 May 2006, at 07:01, Dong, Eddie wrote:

>     In either way, when Xen detects the guest has disabled PIC, the 
> PIC IRQ generation logic can be removed for both performance and 
> simplicity. So even VP0 doesn't need to get/queue interrupt from PIC. 
> The ideal solution may be that we need to define some registable APIs 
> in HVM virtual weired interrupt controller logic. At startup time, we 
> regieter PIC APIs, and later on replace it with APIC's (the transition 
> time need special concern while it is non performance critical path).

Don;t some OSes (e.g., older Linux) expect to be able to keep some 
interrupts routed through 8259 and then have the output of that plumbed 
to an IOAPIC pin? Or can they always fall back from that (is there real 
hardware that isn't able to route 8259 via an IOAPIC)?

  -- Keir

>     Anyway, 2) PIC I/O access may need to be SMP safe (maybe a big 
> lock for simplicity), 3) IOAPIC needs to be SMP safe. Then we try to 
> keep minimal changes to device models with that in Qemu (yes we 
> already have some) for future maintain effort. You are very welcome 
> here.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] make HVM PIC emulation code SMP-safe
  2006-05-17  6:01 Dong, Eddie
  2006-05-17  7:07 ` Keir Fraser
@ 2006-05-18 15:16 ` Dave Lively
  1 sibling, 0 replies; 10+ messages in thread
From: Dave Lively @ 2006-05-18 15:16 UTC (permalink / raw)
  To: Dong, Eddie; +Cc: xen-devel

On 5/17/06, Dong, Eddie <eddie.dong@intel.com> wrote:
> Dave:
>     Looks like this patch is just using "BUG_ON" for APIs like
> pic_ioport_read.

Note that pic_ioport_read/write are internal to the PIC emulation.
But I see your point.  The locking could be moved inside
pic_ioport_read/write.  This would make sense because a read/write to
a PIC I/O port is certainly at atomic event (from the CPUs'
perspective) on real hardware.  (The current implementation
accomplishes the same thing by locking in intercept_pic_io, the only
caller of pic_ioport_read/write.  But I think your suggestion makes
the code easier to understand.)

>     Also let me share something about 1) HVM virtual Interrupt Controller
> movement in consideration. When APIC is enabled for SMP, guest software need
> to disable PIC for correct interrupt generation mechanism. If guest bios is
> presenting MPS, then we can know this through IMCR base on MPS spec when
> guest switch from PIC mode to Symmetric I/O Mode. If guest bios is
> presenting ACPI, the guest software will disable PIC by writing 0xff to
> master PIC IMR. Current VMX guest implements ACPI.

Right.  Currently I tell our guest OS (Linux 2.6)  *not* to use the
I/O APIC, because I end up losing hd interrupts on SMP guests when I
use the I/O APIC.  (But I do enable APIC in the hvm builder because
SMP Linux needs the local APICs for IPIs,)  So because of this, our
guest software is never disabling the (8259) PIC.

>     In either way, when Xen detects the guest has disabled PIC, the PIC IRQ
> generation logic can be removed for both performance and simplicity. So even
> VP0 doesn't need to get/queue interrupt from PIC. The ideal solution may be
> that we need to define some registable APIs in HVM virtual weired interrupt
> controller logic. At startup time, we regieter PIC APIs, and later on
> replace it with APIC's (the transition time need special concern while it is
> non performance critical path).

I like this idea.

>     Anyway, 2) PIC I/O access may need to be SMP safe (maybe a big lock for
> simplicity), 3) IOAPIC needs to be SMP safe. Then we try to keep minimal
> changes to device models with that in Qemu (yes we already have some) for
> future maintain effort. You are very welcome here.

As I mentioned, I have a very similar patch to make the IOAPIC code
SMP safe.  But since (even with these changes) I still see a huge
number of lost hda interrupts when using the IOAPIC on SMP guests, I
haven't been able to test it yet.  I assume others see the same
problems with the IOAPIC??   (I'll be diving into this soon --
probably tonight or tomorrow.  At this point I have no clue what's
going wrong.)

Dave

^ permalink raw reply	[flat|nested] 10+ messages in thread

* RE: [PATCH] make HVM PIC emulation code SMP-safe
@ 2006-05-18 15:50 Jiang, Yunhong
  0 siblings, 0 replies; 10+ messages in thread
From: Jiang, Yunhong @ 2006-05-18 15:50 UTC (permalink / raw)
  To: Dave Lively, Dong, Eddie; +Cc: xen-devel

 
>As I mentioned, I have a very similar patch to make the IOAPIC code
>SMP safe.  But since (even with these changes) I still see a huge
>number of lost hda interrupts when using the IOAPIC on SMP guests, I
>haven't been able to test it yet.  I assume others see the same
>problems with the IOAPIC??   (I'll be diving into this soon --
>probably tonight or tomorrow.  At this point I have no clue what's
>going wrong.)

On which situation will the IOAPIC has a lot of hd lost interrupt?

What's the guest kernel version are you using? I remember some old
version kernel has problem.

Also there is a bug on the round robin code.Current code will always
leads interrupt to vcpu 0.
Followed is the fix for it. But this fix cause problem for timer
interrupt, I'm not sure the cause, but I suspect it is because the timer
is injected in flood.

The below fix is based one of my another APIC patch , so not sure if you
can apply it directly, but I think you can figure out the changes
easily.

Thanks
Yunhong Jiang 

diff -r 86d8246c6aff xen/arch/x86/hvm/vlapic.c
--- a/xen/arch/x86/hvm/vlapic.c Wed May 17 23:15:36 2006 +0100
+++ b/xen/arch/x86/hvm/vlapic.c Thu May 18 22:30:06 2006 +0800
@@ -308,8 +308,15 @@ struct vlapic* apic_round_robin(struct d

     old = next = d->arch.hvm_domain.round_info[vector];

-    do {
-        /* the vcpu array is arranged according to vcpu_id */
+    /* the vcpu array is arranged according to vcpu_id */
+    do
+    {
+        next ++;
+        if ( !d->vcpu[next] ||
+          !test_bit(_VCPUF_initialised, &d->vcpu[next]->vcpu_flags) ||
+          next == MAX_VIRT_CPUS )
+            next = 0;
+
         if ( test_bit(next, &bitmap) )
         {
             target = d->vcpu[next]->arch.hvm_vcpu.vlapic;
@@ -321,12 +328,6 @@ struct vlapic* apic_round_robin(struct d
             }
             break;
         }
-
-        next ++;
-        if ( !d->vcpu[next] ||
-             !test_bit(_VCPUF_initialised, &d->vcpu[next]->vcpu_flags)
||
-             next == MAX_VIRT_CPUS )
-            next = 0;
     } while ( next != old );

     d->arch.hvm_domain.round_info[vector] = next;
~


>
>Dave
>
>_______________________________________________
>Xen-devel mailing list
>Xen-devel@lists.xensource.com
>http://lists.xensource.com/xen-devel
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2006-05-18 15:50 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-05-15 19:11 [PATCH] make HVM PIC emulation code SMP-safe David Lively
2006-05-16  0:29 ` Dave Lively
  -- strict thread matches above, loose matches on Subject: below --
2006-05-16  1:58 Dong, Eddie
2006-05-16  5:20 Dong, Eddie
2006-05-16  7:56 ` Keir Fraser
2006-05-16 15:05   ` Dave Lively
2006-05-17  6:01 Dong, Eddie
2006-05-17  7:07 ` Keir Fraser
2006-05-18 15:16 ` Dave Lively
2006-05-18 15:50 Jiang, Yunhong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.