From mboxrd@z Thu Jan 1 00:00:00 1970 From: "Jan Beulich" Subject: [PATCH] Re: Xen crash on dom0 shutdown Date: Wed, 24 Sep 2008 09:59:15 +0100 Message-ID: <48DA1D83.76E4.0078.0@novell.com> References: <48D8E23D.76E4.0078.0@novell.com> Mime-Version: 1.0 Content-Type: text/plain; charset=US-ASCII Content-Transfer-Encoding: quoted-printable Return-path: In-Reply-To: Content-Disposition: inline List-Unsubscribe: , List-Post: List-Help: List-Subscribe: , Sender: xen-devel-bounces@lists.xensource.com Errors-To: xen-devel-bounces@lists.xensource.com To: Keir Fraser Cc: Yunhong Jiang , Haitao Shan , xen-devel@lists.xensource.com List-Id: xen-devel@lists.xenproject.org >>> Keir Fraser 23.09.08 13:27 >>> >On 23/9/08 11:34, "Jan Beulich" wrote: > >> Simply removing the BUG_ON() seems inappropriate, though. But I'm >> uncertain whether it would be reasonable to call pirq_guest_unbind() >> instead of the BUG_ON(), and if so, how to properly deal with >> irq_desc[vector].lock (the immediate idea would be to factor out the >> locking into a wrapper function, but an alternative would be to use >> recursive locks, and perhaps there are other possibilities). > >Well, this hypercall doesn't do pirq_guest_unbind() on IO-APIC-routed >interrupts either, so I think the problem is wider ranging than just MSI >interrupts. Consider unmap_irq() followed by pirq_guest_unbind() later. >We'll BUG_ON(vector =3D=3D 0) in the latter function. Looks a bit of a = mess to >me. I would say that if there are bindings remaining we should re-direct = the >event-channel to a 'no-op' pirq (e.g., -1) and indeed call >pirq_guest_unbind() or similar. How about this one? It doesn't exactly follow the path you suggested, i.e. doesn't mess with event channels, but rather marks the irq<->vector mapping as invalid, allowing only a subsequent event channel unbind (i.e. close) to recover from that state (which seems better in terms of requiring proper discipline in the guest, as it prevents re-using the irq or vector without properly cleaning up). Signed-off-by: Jan Beulich Index: 2008-09-19/xen/arch/x86/io_apic.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/arch/x86/io_apic.c 2008-09-17 09:26:41.0000000= 00 +0200 +++ 2008-09-19/xen/arch/x86/io_apic.c 2008-09-24 09:19:41.000000000 = +0200 @@ -721,7 +721,6 @@ next: =20 static struct hw_interrupt_type ioapic_level_type; static struct hw_interrupt_type ioapic_edge_type; -struct hw_interrupt_type pci_msi_type; =20 #define IOAPIC_AUTO -1 #define IOAPIC_EDGE 0 Index: 2008-09-19/xen/arch/x86/irq.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/arch/x86/irq.c 2008-09-24 09:17:33.000000000 = +0200 +++ 2008-09-19/xen/arch/x86/irq.c 2008-09-23 15:26:26.000000000 = +0200 @@ -343,6 +343,8 @@ static void __pirq_guest_eoi(struct doma int vector; =20 vector =3D domain_irq_to_vector(d, irq); + if ( vector < 0 ) + return; desc =3D &irq_desc[vector]; action =3D (irq_guest_action_t *)desc->action; =20 @@ -418,7 +420,7 @@ int pirq_acktype(struct domain *d, int i unsigned int vector; =20 vector =3D domain_irq_to_vector(d, irq); - if ( vector =3D=3D 0 ) + if ( vector <=3D 0 ) return ACKTYPE_NONE; =20 desc =3D &irq_desc[vector]; @@ -469,7 +471,7 @@ int pirq_shared(struct domain *d, int ir int shared; =20 vector =3D domain_irq_to_vector(d, irq); - if ( vector =3D=3D 0 ) + if ( vector <=3D 0 ) return 0; =20 desc =3D &irq_desc[vector]; @@ -493,7 +495,7 @@ int pirq_guest_bind(struct vcpu *v, int=20 =20 retry: vector =3D domain_irq_to_vector(v->domain, irq); - if ( vector =3D=3D 0 ) + if ( vector <=3D 0 ) return -EINVAL; =20 desc =3D &irq_desc[vector]; @@ -575,20 +577,15 @@ int pirq_guest_bind(struct vcpu *v, int=20 return rc; } =20 -void pirq_guest_unbind(struct domain *d, int irq) +void __pirq_guest_unbind(struct domain *d, int irq, unsigned int vector, + unsigned long flags) { - unsigned int vector; - irq_desc_t *desc; + irq_desc_t *desc =3D &irq_desc[vector]; irq_guest_action_t *action; cpumask_t cpu_eoi_map; - unsigned long flags; int i; =20 - vector =3D domain_irq_to_vector(d, irq); - desc =3D &irq_desc[vector]; - BUG_ON(vector =3D=3D 0); - - spin_lock_irqsave(&desc->lock, flags); + ASSERT(spin_is_locked(&desc->lock)); =20 action =3D (irq_guest_action_t *)desc->action; =20 @@ -626,7 +623,7 @@ void pirq_guest_unbind(struct domain *d, BUG_ON(test_bit(irq, d->pirq_mask)); =20 if ( action->nr_guests !=3D 0 ) - goto out; + return; =20 BUG_ON(action->in_flight !=3D 0); =20 @@ -659,9 +656,26 @@ void pirq_guest_unbind(struct domain *d, desc->status &=3D ~IRQ_INPROGRESS; kill_timer(&irq_guest_eoi_timer[vector]); desc->handler->shutdown(vector); +} =20 - out: - spin_unlock_irqrestore(&desc->lock, flags); =20 +void pirq_guest_unbind(struct domain *d, int irq) +{ + int vector =3D domain_irq_to_vector(d, irq); + unsigned long flags; + + if ( likely(vector > 0) ) + { + spin_lock_irqsave(&irq_desc[vector].lock, flags); + __pirq_guest_unbind(d, irq, vector, flags); + spin_unlock_irqrestore(&irq_desc[vector].lock, flags); + } + else + { + BUG_ON(vector =3D=3D 0); + spin_lock_irqsave(&d->arch.irq_lock, flags); + d->arch.pirq_vector[irq] =3D d->arch.vector_pirq[-vector] =3D 0; + spin_unlock_irqrestore(&d->arch.irq_lock, flags); + } } =20 extern void dump_ioapic_irq_info(void); Index: 2008-09-19/xen/arch/x86/msi.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/arch/x86/msi.c 2008-08-15 16:18:55.000000000 = +0200 +++ 2008-09-19/xen/arch/x86/msi.c 2008-09-24 09:19:47.000000000 = +0200 @@ -727,7 +727,6 @@ void pci_disable_msi(int vector) __pci_disable_msix(vector); } =20 -extern struct hw_interrupt_type pci_msi_type; static void msi_free_vectors(struct pci_dev* dev) { struct msi_desc *entry, *tmp; Index: 2008-09-19/xen/arch/x86/physdev.c =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/arch/x86/physdev.c 2008-09-24 09:17:33.0000000= 00 +0200 +++ 2008-09-19/xen/arch/x86/physdev.c 2008-09-24 09:19:57.000000000 = +0200 @@ -27,8 +27,6 @@ ioapic_guest_write( unsigned long physbase, unsigned int reg, u32 pval); =20 =20 -extern struct hw_interrupt_type pci_msi_type; - static int get_free_pirq(struct domain *d, int type, int index) { int i; @@ -150,7 +148,7 @@ static int unmap_domain_pirq(struct doma =20 vector =3D d->arch.pirq_vector[pirq]; =20 - if ( !vector ) + if ( vector <=3D 0 ) { gdprintk(XENLOG_G_ERR, "domain %X: pirq %x not mapped still\n", d->domain_id, pirq); @@ -160,18 +158,30 @@ static int unmap_domain_pirq(struct doma =20 desc =3D &irq_desc[vector]; spin_lock_irqsave(&desc->lock, flags); + + if ( desc->status & IRQ_GUEST ) + { + dprintk(XENLOG_G_WARNING, "dom%d: forcing unbind of pirq %d\n", + d->domain_id, pirq); + __pirq_guest_unbind(d, pirq, vector, flags); + vector =3D -vector; + } + if ( desc->msi_desc ) pci_disable_msi(vector); =20 if ( desc->handler =3D=3D &pci_msi_type ) - { - /* MSI is not shared, so should be released already */ - BUG_ON(desc->status & IRQ_GUEST); - irq_desc[vector].handler =3D &no_irq_type; - } + desc->handler =3D &no_irq_type; + spin_unlock_irqrestore(&desc->lock, flags); =20 - d->arch.pirq_vector[pirq] =3D d->arch.vector_pirq[vector] =3D 0; + if ( vector > 0 ) + d->arch.pirq_vector[pirq] =3D d->arch.vector_pirq[vector] =3D 0; + else + { + d->arch.pirq_vector[pirq] =3D vector; + d->arch.vector_pirq[-vector] =3D -pirq; + } =20 ret =3D irq_deny_access(d, pirq); if ( ret ) @@ -244,7 +254,7 @@ static int physdev_map_pirq(struct physd } =20 spin_lock_irqsave(&d->arch.irq_lock, flags); - if ( map->pirq =3D=3D -1 ) + if ( map->pirq < 0 ) { if ( d->arch.vector_pirq[vector] ) { @@ -252,6 +262,11 @@ static int physdev_map_pirq(struct physd map->index, map->pirq, d->arch.vector_pirq[vector]); pirq =3D d->arch.vector_pirq[vector]; + if ( pirq < 0 ) + { + ret =3D -EBUSY; + goto done; + } } else { Index: 2008-09-19/xen/include/asm-x86/irq.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/include/asm-x86/irq.h 2008-09-24 09:17:33.0000000= 00 +0200 +++ 2008-09-19/xen/include/asm-x86/irq.h 2008-09-23 15:25:53.0000000= 00 +0200 @@ -52,6 +52,10 @@ extern atomic_t irq_mis_count; int pirq_acktype(struct domain *d, int irq); int pirq_shared(struct domain *d , int irq); =20 +/* May only be called be irq_desc[vector].lock held. */ +void __pirq_guest_unbind(struct domain *d, int irq, unsigned int vector, + unsigned long flags); + extern int domain_irq_to_vector(struct domain *d, int irq); extern int domain_vector_to_irq(struct domain *d, int vector); #endif /* _ASM_HW_IRQ_H */ Index: 2008-09-19/xen/include/asm-x86/msi.h =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D --- 2008-09-19.orig/xen/include/asm-x86/msi.h 2008-08-15 16:18:55.0000000= 00 +0200 +++ 2008-09-19/xen/include/asm-x86/msi.h 2008-09-24 09:21:44.0000000= 00 +0200 @@ -106,7 +106,7 @@ struct msi_desc { */ #define NR_HP_RESERVED_VECTORS 20 =20 -extern int vector_irq[NR_VECTORS]; +extern struct hw_interrupt_type pci_msi_type; =20 /* * MSI-X Address Register