* [PATCH] IRQ: manually EOI migrating line interrupts
@ 2011-08-30 14:17 Andrew Cooper
2011-08-30 14:19 ` Andrew Cooper
` (2 more replies)
0 siblings, 3 replies; 13+ messages in thread
From: Andrew Cooper @ 2011-08-30 14:17 UTC (permalink / raw)
To: xen-devel; +Cc: Andrew Cooper
When migrating IO-APIC line level interrupts between PCPUs, the
migration code rewrites the IO-APIC entry to point to the new
CPU/Vector before EOI'ing it.
The EOI process says that EOI'ing the Local APIC will cause a
broadcast with the vector number, which the IO-APIC must listen to to
clear the IRR and Status bits.
In the case of migrating, the IO-APIC has already been
reprogrammed so the EOI broadcast with the old vector fails to match
the new vector, leaving the IO-APIC with an outstanding vector,
preventing any more use of that line interrupt. This causes a lockup
especially when your root device is using PCI INTA (megaraid_sas
driver *ehem*)
However, the problem is mostly hidden because send_cleanup_vector()
causes a cleanup of all moving vectors on the current PCPU in such a
way which does not cause the problem, and if the problem has occured,
the writes it makes to the IO-APIC clears the IRR and Status bits
which unlocks the problem.
This fix is distinctly a temporary hack, waiting on a cleanup of the
irq code. It checks for the edge case where we have moved the irq,
and manually EOI's the old vector with the IO-APIC which correctly
clears the IRR and Status bits. Also, it protects the code which
updates irq_cfg by disabling interrupts.
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/hpet.c
--- a/xen/arch/x86/hpet.c Thu Aug 25 12:03:14 2011 +0100
+++ b/xen/arch/x86/hpet.c Tue Aug 30 15:15:56 2011 +0100
@@ -301,7 +301,7 @@ static void hpet_msi_ack(unsigned int ir
ack_APIC_irq();
}
-static void hpet_msi_end(unsigned int irq)
+static void hpet_msi_end(unsigned int irq, u8 vector)
{
}
diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/i8259.c
--- a/xen/arch/x86/i8259.c Thu Aug 25 12:03:14 2011 +0100
+++ b/xen/arch/x86/i8259.c Tue Aug 30 15:15:56 2011 +0100
@@ -93,7 +93,7 @@ static unsigned int startup_8259A_irq(un
return 0; /* never anything pending */
}
-static void end_8259A_irq(unsigned int irq)
+static void end_8259A_irq(unsigned int irq, u8 vector)
{
if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
enable_8259A_irq(irq);
diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/io_apic.c
--- a/xen/arch/x86/io_apic.c Thu Aug 25 12:03:14 2011 +0100
+++ b/xen/arch/x86/io_apic.c Tue Aug 30 15:15:56 2011 +0100
@@ -1690,7 +1690,7 @@ static void mask_and_ack_level_ioapic_ir
}
}
-static void end_level_ioapic_irq (unsigned int irq)
+static void end_level_ioapic_irq (unsigned int irq, u8 vector)
{
unsigned long v;
int i;
@@ -1739,6 +1739,14 @@ static void end_level_ioapic_irq (unsign
*/
i = IO_APIC_VECTOR(irq);
+ /* Manually EOI the old vector if we are moving to the new */
+ if ( vector && i != vector )
+ {
+ int ioapic;
+ for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
+ io_apic_eoi(ioapic, i);
+ }
+
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
ack_APIC_irq();
@@ -1762,7 +1770,10 @@ static void disable_edge_ioapic_irq(unsi
{
}
-#define end_edge_ioapic_irq disable_edge_ioapic_irq
+static void end_edge_ioapic_irq(unsigned int irq, u8 vector)
+{
+}
+
/*
* Level and edge triggered IO-APIC interrupts need different handling,
@@ -1811,7 +1822,7 @@ static void ack_msi_irq(unsigned int irq
ack_APIC_irq(); /* ACKTYPE_NONE */
}
-static void end_msi_irq(unsigned int irq)
+static void end_msi_irq(unsigned int irq, u8 vector)
{
if ( !msi_maskable_irq(irq_desc[irq].msi_desc) )
ack_APIC_irq(); /* ACKTYPE_EOI */
diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/irq.c
--- a/xen/arch/x86/irq.c Thu Aug 25 12:03:14 2011 +0100
+++ b/xen/arch/x86/irq.c Tue Aug 30 15:15:56 2011 +0100
@@ -345,6 +345,7 @@ static void __do_IRQ_guest(int vector);
void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
static void enable_none(unsigned int vector) { }
+static void end_none(unsigned int irq, u8 vector) { }
static unsigned int startup_none(unsigned int vector) { return 0; }
static void disable_none(unsigned int vector) { }
static void ack_none(unsigned int irq)
@@ -353,7 +354,6 @@ static void ack_none(unsigned int irq)
}
#define shutdown_none disable_none
-#define end_none enable_none
hw_irq_controller no_irq_type = {
"none",
@@ -381,6 +381,7 @@ int __assign_irq_vector(int irq, struct
static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
unsigned int old_vector;
int cpu, err;
+ unsigned long flags;
cpumask_t tmp_mask;
old_vector = irq_to_vector(irq);
@@ -431,6 +432,7 @@ next:
/* Found one! */
current_vector = vector;
current_offset = offset;
+ local_irq_save(flags);
if (old_vector) {
cfg->move_in_progress = 1;
cpus_copy(cfg->old_cpu_mask, cfg->cpu_mask);
@@ -450,6 +452,7 @@ next:
if (IO_APIC_IRQ(irq))
irq_vector[irq] = vector;
err = 0;
+ local_irq_restore(flags);
break;
}
return err;
@@ -657,7 +660,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
desc->status &= ~IRQ_INPROGRESS;
out:
- desc->handler->end(irq);
+ desc->handler->end(irq, regs->entry_vector);
out_no_end:
spin_unlock(&desc->lock);
irq_exit();
@@ -857,7 +860,7 @@ static void irq_guest_eoi_timer_fn(void
switch ( action->ack_type )
{
case ACKTYPE_UNMASK:
- desc->handler->end(irq);
+ desc->handler->end(irq, 0);
break;
case ACKTYPE_EOI:
cpu_eoi_map = action->cpu_eoi_map;
@@ -885,7 +888,7 @@ static void __do_IRQ_guest(int irq)
/* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */
ASSERT(action->ack_type == ACKTYPE_EOI);
ASSERT(desc->status & IRQ_DISABLED);
- desc->handler->end(irq);
+ desc->handler->end(irq, vector);
return;
}
@@ -1099,7 +1102,7 @@ static void flush_ready_eoi(void)
ASSERT(irq > 0);
desc = irq_to_desc(irq);
spin_lock(&desc->lock);
- desc->handler->end(irq);
+ desc->handler->end(irq, peoi[sp].vector);
spin_unlock(&desc->lock);
}
@@ -1177,7 +1180,7 @@ void desc_guest_eoi(struct irq_desc *des
if ( action->ack_type == ACKTYPE_UNMASK )
{
ASSERT(cpus_empty(action->cpu_eoi_map));
- desc->handler->end(irq);
+ desc->handler->end(irq, 0);
spin_unlock_irq(&desc->lock);
return;
}
@@ -1431,7 +1434,7 @@ static irq_guest_action_t *__pirq_guest_
case ACKTYPE_UNMASK:
if ( test_and_clear_bool(pirq->masked) &&
(--action->in_flight == 0) )
- desc->handler->end(irq);
+ desc->handler->end(irq, 0);
break;
case ACKTYPE_EOI:
/* NB. If #guests == 0 then we clear the eoi_map later on. */
diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/amd/iommu_init.c
--- a/xen/drivers/passthrough/amd/iommu_init.c Thu Aug 25 12:03:14 2011 +0100
+++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Aug 30 15:15:56 2011 +0100
@@ -441,7 +441,7 @@ static unsigned int iommu_msi_startup(un
return 0;
}
-static void iommu_msi_end(unsigned int irq)
+static void iommu_msi_end(unsigned int irq, u8 vector)
{
iommu_msi_unmask(irq);
ack_APIC_irq();
diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c Thu Aug 25 12:03:14 2011 +0100
+++ b/xen/drivers/passthrough/vtd/iommu.c Tue Aug 30 15:15:56 2011 +0100
@@ -971,7 +971,7 @@ static unsigned int dma_msi_startup(unsi
return 0;
}
-static void dma_msi_end(unsigned int irq)
+static void dma_msi_end(unsigned int irq, u8 vector)
{
dma_msi_unmask(irq);
ack_APIC_irq();
diff -r 227130622561 -r a95fd5d03c20 xen/include/xen/irq.h
--- a/xen/include/xen/irq.h Thu Aug 25 12:03:14 2011 +0100
+++ b/xen/include/xen/irq.h Tue Aug 30 15:15:56 2011 +0100
@@ -44,7 +44,7 @@ struct hw_interrupt_type {
void (*enable)(unsigned int irq);
void (*disable)(unsigned int irq);
void (*ack)(unsigned int irq);
- void (*end)(unsigned int irq);
+ void (*end)(unsigned int irq, u8 vector);
void (*set_affinity)(unsigned int irq, cpumask_t mask);
};
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-08-30 14:17 [PATCH] IRQ: manually EOI migrating line interrupts Andrew Cooper
@ 2011-08-30 14:19 ` Andrew Cooper
2011-08-30 14:28 ` Andrew Cooper
2011-09-05 10:43 ` Jan Beulich
2 siblings, 0 replies; 13+ messages in thread
From: Andrew Cooper @ 2011-08-30 14:19 UTC (permalink / raw)
To: xen-devel@lists.xensource.com, Keir Fraser
On 30/08/11 15:17, Andrew Cooper wrote:
> --- SNIP ---
>
This is a candidate for backport to Xen-4.1 and Xen-4.0
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-08-30 14:17 [PATCH] IRQ: manually EOI migrating line interrupts Andrew Cooper
2011-08-30 14:19 ` Andrew Cooper
@ 2011-08-30 14:28 ` Andrew Cooper
2011-08-30 14:35 ` Keir Fraser
2011-08-30 14:38 ` Keir Fraser
2011-09-05 10:43 ` Jan Beulich
2 siblings, 2 replies; 13+ messages in thread
From: Andrew Cooper @ 2011-08-30 14:28 UTC (permalink / raw)
To: xen-devel@lists.xensource.com, Keir Fraser
On 30/08/11 15:17, Andrew Cooper wrote:
> When migrating IO-APIC line level interrupts between PCPUs, the
> migration code rewrites the IO-APIC entry to point to the new
> CPU/Vector before EOI'ing it.
>
> The EOI process says that EOI'ing the Local APIC will cause a
> broadcast with the vector number, which the IO-APIC must listen to to
> clear the IRR and Status bits.
>
> In the case of migrating, the IO-APIC has already been
> reprogrammed so the EOI broadcast with the old vector fails to match
> the new vector, leaving the IO-APIC with an outstanding vector,
> preventing any more use of that line interrupt. This causes a lockup
> especially when your root device is using PCI INTA (megaraid_sas
> driver *ehem*)
>
> However, the problem is mostly hidden because send_cleanup_vector()
> causes a cleanup of all moving vectors on the current PCPU in such a
> way which does not cause the problem, and if the problem has occured,
> the writes it makes to the IO-APIC clears the IRR and Status bits
> which unlocks the problem.
>
>
> This fix is distinctly a temporary hack, waiting on a cleanup of the
> irq code. It checks for the edge case where we have moved the irq,
> and manually EOI's the old vector with the IO-APIC which correctly
> clears the IRR and Status bits. Also, it protects the code which
> updates irq_cfg by disabling interrupts.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
P.S.
Because this fix actually works, it means that EOI'ing the IO-APIC is
keyed only on vector, and not the target CPU. As a result, having two
different interrupts in the IO-APIC with the same vector will result in
problems when trying to EOI one of them.
I will address this problem as well in my IRQ cleanup, as I cant see any
checks for it currently.
~Andrew
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/hpet.c
> --- a/xen/arch/x86/hpet.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/hpet.c Tue Aug 30 15:15:56 2011 +0100
> @@ -301,7 +301,7 @@ static void hpet_msi_ack(unsigned int ir
> ack_APIC_irq();
> }
>
> -static void hpet_msi_end(unsigned int irq)
> +static void hpet_msi_end(unsigned int irq, u8 vector)
> {
> }
>
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/i8259.c
> --- a/xen/arch/x86/i8259.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/i8259.c Tue Aug 30 15:15:56 2011 +0100
> @@ -93,7 +93,7 @@ static unsigned int startup_8259A_irq(un
> return 0; /* never anything pending */
> }
>
> -static void end_8259A_irq(unsigned int irq)
> +static void end_8259A_irq(unsigned int irq, u8 vector)
> {
> if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
> enable_8259A_irq(irq);
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/io_apic.c
> --- a/xen/arch/x86/io_apic.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/io_apic.c Tue Aug 30 15:15:56 2011 +0100
> @@ -1690,7 +1690,7 @@ static void mask_and_ack_level_ioapic_ir
> }
> }
>
> -static void end_level_ioapic_irq (unsigned int irq)
> +static void end_level_ioapic_irq (unsigned int irq, u8 vector)
> {
> unsigned long v;
> int i;
> @@ -1739,6 +1739,14 @@ static void end_level_ioapic_irq (unsign
> */
> i = IO_APIC_VECTOR(irq);
>
> + /* Manually EOI the old vector if we are moving to the new */
> + if ( vector && i != vector )
> + {
> + int ioapic;
> + for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
> + io_apic_eoi(ioapic, i);
> + }
> +
> v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
>
> ack_APIC_irq();
> @@ -1762,7 +1770,10 @@ static void disable_edge_ioapic_irq(unsi
> {
> }
>
> -#define end_edge_ioapic_irq disable_edge_ioapic_irq
> +static void end_edge_ioapic_irq(unsigned int irq, u8 vector)
> +{
> +}
> +
>
> /*
> * Level and edge triggered IO-APIC interrupts need different handling,
> @@ -1811,7 +1822,7 @@ static void ack_msi_irq(unsigned int irq
> ack_APIC_irq(); /* ACKTYPE_NONE */
> }
>
> -static void end_msi_irq(unsigned int irq)
> +static void end_msi_irq(unsigned int irq, u8 vector)
> {
> if ( !msi_maskable_irq(irq_desc[irq].msi_desc) )
> ack_APIC_irq(); /* ACKTYPE_EOI */
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/irq.c
> --- a/xen/arch/x86/irq.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/irq.c Tue Aug 30 15:15:56 2011 +0100
> @@ -345,6 +345,7 @@ static void __do_IRQ_guest(int vector);
> void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
>
> static void enable_none(unsigned int vector) { }
> +static void end_none(unsigned int irq, u8 vector) { }
> static unsigned int startup_none(unsigned int vector) { return 0; }
> static void disable_none(unsigned int vector) { }
> static void ack_none(unsigned int irq)
> @@ -353,7 +354,6 @@ static void ack_none(unsigned int irq)
> }
>
> #define shutdown_none disable_none
> -#define end_none enable_none
>
> hw_irq_controller no_irq_type = {
> "none",
> @@ -381,6 +381,7 @@ int __assign_irq_vector(int irq, struct
> static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
> unsigned int old_vector;
> int cpu, err;
> + unsigned long flags;
> cpumask_t tmp_mask;
>
> old_vector = irq_to_vector(irq);
> @@ -431,6 +432,7 @@ next:
> /* Found one! */
> current_vector = vector;
> current_offset = offset;
> + local_irq_save(flags);
> if (old_vector) {
> cfg->move_in_progress = 1;
> cpus_copy(cfg->old_cpu_mask, cfg->cpu_mask);
> @@ -450,6 +452,7 @@ next:
> if (IO_APIC_IRQ(irq))
> irq_vector[irq] = vector;
> err = 0;
> + local_irq_restore(flags);
> break;
> }
> return err;
> @@ -657,7 +660,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
> desc->status &= ~IRQ_INPROGRESS;
>
> out:
> - desc->handler->end(irq);
> + desc->handler->end(irq, regs->entry_vector);
> out_no_end:
> spin_unlock(&desc->lock);
> irq_exit();
> @@ -857,7 +860,7 @@ static void irq_guest_eoi_timer_fn(void
> switch ( action->ack_type )
> {
> case ACKTYPE_UNMASK:
> - desc->handler->end(irq);
> + desc->handler->end(irq, 0);
> break;
> case ACKTYPE_EOI:
> cpu_eoi_map = action->cpu_eoi_map;
> @@ -885,7 +888,7 @@ static void __do_IRQ_guest(int irq)
> /* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */
> ASSERT(action->ack_type == ACKTYPE_EOI);
> ASSERT(desc->status & IRQ_DISABLED);
> - desc->handler->end(irq);
> + desc->handler->end(irq, vector);
> return;
> }
>
> @@ -1099,7 +1102,7 @@ static void flush_ready_eoi(void)
> ASSERT(irq > 0);
> desc = irq_to_desc(irq);
> spin_lock(&desc->lock);
> - desc->handler->end(irq);
> + desc->handler->end(irq, peoi[sp].vector);
> spin_unlock(&desc->lock);
> }
>
> @@ -1177,7 +1180,7 @@ void desc_guest_eoi(struct irq_desc *des
> if ( action->ack_type == ACKTYPE_UNMASK )
> {
> ASSERT(cpus_empty(action->cpu_eoi_map));
> - desc->handler->end(irq);
> + desc->handler->end(irq, 0);
> spin_unlock_irq(&desc->lock);
> return;
> }
> @@ -1431,7 +1434,7 @@ static irq_guest_action_t *__pirq_guest_
> case ACKTYPE_UNMASK:
> if ( test_and_clear_bool(pirq->masked) &&
> (--action->in_flight == 0) )
> - desc->handler->end(irq);
> + desc->handler->end(irq, 0);
> break;
> case ACKTYPE_EOI:
> /* NB. If #guests == 0 then we clear the eoi_map later on. */
> diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/amd/iommu_init.c
> --- a/xen/drivers/passthrough/amd/iommu_init.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Aug 30 15:15:56 2011 +0100
> @@ -441,7 +441,7 @@ static unsigned int iommu_msi_startup(un
> return 0;
> }
>
> -static void iommu_msi_end(unsigned int irq)
> +static void iommu_msi_end(unsigned int irq, u8 vector)
> {
> iommu_msi_unmask(irq);
> ack_APIC_irq();
> diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/vtd/iommu.c
> --- a/xen/drivers/passthrough/vtd/iommu.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/drivers/passthrough/vtd/iommu.c Tue Aug 30 15:15:56 2011 +0100
> @@ -971,7 +971,7 @@ static unsigned int dma_msi_startup(unsi
> return 0;
> }
>
> -static void dma_msi_end(unsigned int irq)
> +static void dma_msi_end(unsigned int irq, u8 vector)
> {
> dma_msi_unmask(irq);
> ack_APIC_irq();
> diff -r 227130622561 -r a95fd5d03c20 xen/include/xen/irq.h
> --- a/xen/include/xen/irq.h Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/include/xen/irq.h Tue Aug 30 15:15:56 2011 +0100
> @@ -44,7 +44,7 @@ struct hw_interrupt_type {
> void (*enable)(unsigned int irq);
> void (*disable)(unsigned int irq);
> void (*ack)(unsigned int irq);
> - void (*end)(unsigned int irq);
> + void (*end)(unsigned int irq, u8 vector);
> void (*set_affinity)(unsigned int irq, cpumask_t mask);
> };
>
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-08-30 14:28 ` Andrew Cooper
@ 2011-08-30 14:35 ` Keir Fraser
2011-08-30 15:03 ` Andrew Cooper
2011-08-30 14:38 ` Keir Fraser
1 sibling, 1 reply; 13+ messages in thread
From: Keir Fraser @ 2011-08-30 14:35 UTC (permalink / raw)
To: Andrew Cooper, xen-devel@lists.xensource.com
On 30/08/2011 15:28, "Andrew Cooper" <andrew.cooper3@citrix.com> wrote:
> P.S.
>
> Because this fix actually works, it means that EOI'ing the IO-APIC is
> keyed only on vector, and not the target CPU. As a result, having two
> different interrupts in the IO-APIC with the same vector will result in
> problems when trying to EOI one of them.
>
> I will address this problem as well in my IRQ cleanup, as I cant see any
> checks for it currently.
Isn't this a general issue with per-cpu vector allocations anyway? The usual
irq-completion path of LAPIC EOI -> IO-APIC EOI broadcast will unavoidably
have this issue. Every irq line on every IO-APIC currently programmed with
that vector (regardless of target CPU) will get EOIed/unmasked. Not clear
it's really a problem though! It might cause the odd spurious interrupt
perhaps?
-- Keir
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-08-30 14:35 ` Keir Fraser
@ 2011-08-30 15:03 ` Andrew Cooper
0 siblings, 0 replies; 13+ messages in thread
From: Andrew Cooper @ 2011-08-30 15:03 UTC (permalink / raw)
To: Keir Fraser; +Cc: xen-devel@lists.xensource.com
On 30/08/11 15:35, Keir Fraser wrote:
> On 30/08/2011 15:28, "Andrew Cooper" <andrew.cooper3@citrix.com> wrote:
>
>> P.S.
>>
>> Because this fix actually works, it means that EOI'ing the IO-APIC is
>> keyed only on vector, and not the target CPU. As a result, having two
>> different interrupts in the IO-APIC with the same vector will result in
>> problems when trying to EOI one of them.
>>
>> I will address this problem as well in my IRQ cleanup, as I cant see any
>> checks for it currently.
> Isn't this a general issue with per-cpu vector allocations anyway? The usual
> irq-completion path of LAPIC EOI -> IO-APIC EOI broadcast will unavoidably
> have this issue. Every irq line on every IO-APIC currently programmed with
> that vector (regardless of target CPU) will get EOIed/unmasked. Not clear
> it's really a problem though! It might cause the odd spurious interrupt
> perhaps?
>
> -- Keir
The manual (Section 10.8.5) says "If the terminated interrupt was a
level-triggered interrupt, the local APIC also sends an end-of-interrupt
message to all I/O APICs"
So the EOI broadcast only happens if the TMR bit say that the interrupt
was line level, which should make the code fine.
Alternatively, you can explicitly disable the EOI broadcast (if
supported - bit 24 of the Local APIC Version Register) by setting bit 12
of the SPIV Register, and manually writing to the IO-APIC EOI register.
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-08-30 14:28 ` Andrew Cooper
2011-08-30 14:35 ` Keir Fraser
@ 2011-08-30 14:38 ` Keir Fraser
2011-08-30 15:19 ` Andrew Cooper
1 sibling, 1 reply; 13+ messages in thread
From: Keir Fraser @ 2011-08-30 14:38 UTC (permalink / raw)
To: Andrew Cooper, xen-devel@lists.xensource.com
On 30/08/2011 15:28, "Andrew Cooper" <andrew.cooper3@citrix.com> wrote:
>> @@ -1739,6 +1739,14 @@ static void end_level_ioapic_irq (unsign
>> */
>> i = IO_APIC_VECTOR(irq);
>>
>> + /* Manually EOI the old vector if we are moving to the new */
>> + if ( vector && i != vector )
>> + {
>> + int ioapic;
>> + for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
>> + io_apic_eoi(ioapic, i);
>> + }
>> +
I don't know whether it's worth the effort, but we ought to be able to do
better than this and send EOI to exactly the correct IO-APIC. I think
irq=gsi here? And we should know the gsi_base of every IO-APIC, so we can
work out in fact which pin of which IO-APIC needs clobbering?
-- Keir
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-08-30 14:38 ` Keir Fraser
@ 2011-08-30 15:19 ` Andrew Cooper
2011-08-30 16:04 ` Keir Fraser
2011-09-05 10:50 ` Jan Beulich
0 siblings, 2 replies; 13+ messages in thread
From: Andrew Cooper @ 2011-08-30 15:19 UTC (permalink / raw)
To: Keir Fraser; +Cc: xen-devel@lists.xensource.com
On 30/08/11 15:38, Keir Fraser wrote:
> On 30/08/2011 15:28, "Andrew Cooper" <andrew.cooper3@citrix.com> wrote:
>
>>> @@ -1739,6 +1739,14 @@ static void end_level_ioapic_irq (unsign
>>> */
>>> i = IO_APIC_VECTOR(irq);
>>>
>>> + /* Manually EOI the old vector if we are moving to the new */
>>> + if ( vector && i != vector )
>>> + {
>>> + int ioapic;
>>> + for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
>>> + io_apic_eoi(ioapic, i);
>>> + }
>>> +
> I don't know whether it's worth the effort, but we ought to be able to do
> better than this and send EOI to exactly the correct IO-APIC. I think
> irq=gsi here? And we should know the gsi_base of every IO-APIC, so we can
> work out in fact which pin of which IO-APIC needs clobbering?
>
> -- Keir
irq does (or really should) equal gsi. I had not noticed gsi_base and
gsi_end when making this fix.
io_apic_eoi does not require a pin, but it is using IO-APIC registers
which I can not find references to. The Local APIC document implies
that you just write the vector to the EOI register, and the IO-APIC will
work out which pin to clear.
However, because Xen currently might assign the same vector to two pins
in the same IO-APIC, changing the code at this point will not fix the
problem - just make it rarer. Therefore, I would suggest that it is not
worth the effort, as the problem is already very rare, and unlikely to
be a problem with any sane hardware which avoids PCI INTx interrupts
where possible.
P.S. If anyone knows which manual contains the specificaion/programming
guide for the IO-APIC, I would be very gratefull. Google always points
to 82093AA datasheet which is very out of date.
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-08-30 15:19 ` Andrew Cooper
@ 2011-08-30 16:04 ` Keir Fraser
2011-09-05 10:50 ` Jan Beulich
1 sibling, 0 replies; 13+ messages in thread
From: Keir Fraser @ 2011-08-30 16:04 UTC (permalink / raw)
To: Andrew Cooper; +Cc: xen-devel@lists.xensource.com
On 30/08/2011 16:19, "Andrew Cooper" <andrew.cooper3@citrix.com> wrote:
> P.S. If anyone knows which manual contains the specificaion/programming
> guide for the IO-APIC, I would be very gratefull. Google always points
> to 82093AA datasheet which is very out of date.
If you find something better/newer then I'm interested too. The 82093AA
datasheet is what I've been going on so far.
-- Keir
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-08-30 15:19 ` Andrew Cooper
2011-08-30 16:04 ` Keir Fraser
@ 2011-09-05 10:50 ` Jan Beulich
2011-09-05 12:24 ` Andrew Cooper
1 sibling, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2011-09-05 10:50 UTC (permalink / raw)
To: Andrew Cooper, Keir Fraser; +Cc: xen-devel@lists.xensource.com
>>> On 30.08.11 at 17:19, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> P.S. If anyone knows which manual contains the specificaion/programming
> guide for the IO-APIC, I would be very gratefull. Google always points
> to 82093AA datasheet which is very out of date.
Intel's ICH documentation (e.g. document number 319973-003) has a
section on the IO-APIC (among the other LPC stuff), which covers the
EOI register and possibly other newer additions.
Jan
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-09-05 10:50 ` Jan Beulich
@ 2011-09-05 12:24 ` Andrew Cooper
0 siblings, 0 replies; 13+ messages in thread
From: Andrew Cooper @ 2011-09-05 12:24 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel@lists.xensource.com, Keir Fraser
On 05/09/11 11:50, Jan Beulich wrote:
>>>> On 30.08.11 at 17:19, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> P.S. If anyone knows which manual contains the specificaion/programming
>> guide for the IO-APIC, I would be very gratefull. Google always points
>> to 82093AA datasheet which is very out of date.
> Intel's ICH documentation (e.g. document number 319973-003) has a
> section on the IO-APIC (among the other LPC stuff), which covers the
> EOI register and possibly other newer additions.
>
> Jan
Fantastic - Thanks
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-08-30 14:17 [PATCH] IRQ: manually EOI migrating line interrupts Andrew Cooper
2011-08-30 14:19 ` Andrew Cooper
2011-08-30 14:28 ` Andrew Cooper
@ 2011-09-05 10:43 ` Jan Beulich
2011-09-05 12:29 ` Andrew Cooper
2 siblings, 1 reply; 13+ messages in thread
From: Jan Beulich @ 2011-09-05 10:43 UTC (permalink / raw)
To: Andrew Cooper; +Cc: xen-devel
>>> On 30.08.11 at 16:17, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
> When migrating IO-APIC line level interrupts between PCPUs, the
> migration code rewrites the IO-APIC entry to point to the new
> CPU/Vector before EOI'ing it.
>
> The EOI process says that EOI'ing the Local APIC will cause a
> broadcast with the vector number, which the IO-APIC must listen to to
> clear the IRR and Status bits.
>
> In the case of migrating, the IO-APIC has already been
> reprogrammed so the EOI broadcast with the old vector fails to match
> the new vector, leaving the IO-APIC with an outstanding vector,
> preventing any more use of that line interrupt. This causes a lockup
> especially when your root device is using PCI INTA (megaraid_sas
> driver *ehem*)
>
> However, the problem is mostly hidden because send_cleanup_vector()
> causes a cleanup of all moving vectors on the current PCPU in such a
> way which does not cause the problem, and if the problem has occured,
> the writes it makes to the IO-APIC clears the IRR and Status bits
> which unlocks the problem.
>
>
> This fix is distinctly a temporary hack, waiting on a cleanup of the
> irq code. It checks for the edge case where we have moved the irq,
> and manually EOI's the old vector with the IO-APIC which correctly
> clears the IRR and Status bits. Also, it protects the code which
> updates irq_cfg by disabling interrupts.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/hpet.c
> --- a/xen/arch/x86/hpet.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/hpet.c Tue Aug 30 15:15:56 2011 +0100
> @@ -301,7 +301,7 @@ static void hpet_msi_ack(unsigned int ir
> ack_APIC_irq();
> }
>
> -static void hpet_msi_end(unsigned int irq)
> +static void hpet_msi_end(unsigned int irq, u8 vector)
> {
> }
>
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/i8259.c
> --- a/xen/arch/x86/i8259.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/i8259.c Tue Aug 30 15:15:56 2011 +0100
> @@ -93,7 +93,7 @@ static unsigned int startup_8259A_irq(un
> return 0; /* never anything pending */
> }
>
> -static void end_8259A_irq(unsigned int irq)
> +static void end_8259A_irq(unsigned int irq, u8 vector)
> {
> if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
> enable_8259A_irq(irq);
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/io_apic.c
> --- a/xen/arch/x86/io_apic.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/io_apic.c Tue Aug 30 15:15:56 2011 +0100
> @@ -1690,7 +1690,7 @@ static void mask_and_ack_level_ioapic_ir
> }
> }
>
> -static void end_level_ioapic_irq (unsigned int irq)
> +static void end_level_ioapic_irq (unsigned int irq, u8 vector)
> {
> unsigned long v;
> int i;
> @@ -1739,6 +1739,14 @@ static void end_level_ioapic_irq (unsign
> */
> i = IO_APIC_VECTOR(irq);
>
> + /* Manually EOI the old vector if we are moving to the new */
> + if ( vector && i != vector )
> + {
> + int ioapic;
> + for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
> + io_apic_eoi(ioapic, i);
While I realize that the patch already went in, this still will need
adjustment for dealing with old IO-APICs that don't have an EOI
register (or if we want to stop supporting such, a clear panic()
rather than subtle and hard to debug failure).
Jan
> + }
> +
> v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
>
> ack_APIC_irq();
> @@ -1762,7 +1770,10 @@ static void disable_edge_ioapic_irq(unsi
> {
> }
>
> -#define end_edge_ioapic_irq disable_edge_ioapic_irq
> +static void end_edge_ioapic_irq(unsigned int irq, u8 vector)
> +{
> +}
> +
>
> /*
> * Level and edge triggered IO-APIC interrupts need different handling,
> @@ -1811,7 +1822,7 @@ static void ack_msi_irq(unsigned int irq
> ack_APIC_irq(); /* ACKTYPE_NONE */
> }
>
> -static void end_msi_irq(unsigned int irq)
> +static void end_msi_irq(unsigned int irq, u8 vector)
> {
> if ( !msi_maskable_irq(irq_desc[irq].msi_desc) )
> ack_APIC_irq(); /* ACKTYPE_EOI */
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/irq.c
> --- a/xen/arch/x86/irq.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/irq.c Tue Aug 30 15:15:56 2011 +0100
> @@ -345,6 +345,7 @@ static void __do_IRQ_guest(int vector);
> void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
>
> static void enable_none(unsigned int vector) { }
> +static void end_none(unsigned int irq, u8 vector) { }
> static unsigned int startup_none(unsigned int vector) { return 0; }
> static void disable_none(unsigned int vector) { }
> static void ack_none(unsigned int irq)
> @@ -353,7 +354,6 @@ static void ack_none(unsigned int irq)
> }
>
> #define shutdown_none disable_none
> -#define end_none enable_none
>
> hw_irq_controller no_irq_type = {
> "none",
> @@ -381,6 +381,7 @@ int __assign_irq_vector(int irq, struct
> static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
> unsigned int old_vector;
> int cpu, err;
> + unsigned long flags;
> cpumask_t tmp_mask;
>
> old_vector = irq_to_vector(irq);
> @@ -431,6 +432,7 @@ next:
> /* Found one! */
> current_vector = vector;
> current_offset = offset;
> + local_irq_save(flags);
> if (old_vector) {
> cfg->move_in_progress = 1;
> cpus_copy(cfg->old_cpu_mask, cfg->cpu_mask);
> @@ -450,6 +452,7 @@ next:
> if (IO_APIC_IRQ(irq))
> irq_vector[irq] = vector;
> err = 0;
> + local_irq_restore(flags);
> break;
> }
> return err;
> @@ -657,7 +660,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
> desc->status &= ~IRQ_INPROGRESS;
>
> out:
> - desc->handler->end(irq);
> + desc->handler->end(irq, regs->entry_vector);
> out_no_end:
> spin_unlock(&desc->lock);
> irq_exit();
> @@ -857,7 +860,7 @@ static void irq_guest_eoi_timer_fn(void
> switch ( action->ack_type )
> {
> case ACKTYPE_UNMASK:
> - desc->handler->end(irq);
> + desc->handler->end(irq, 0);
> break;
> case ACKTYPE_EOI:
> cpu_eoi_map = action->cpu_eoi_map;
> @@ -885,7 +888,7 @@ static void __do_IRQ_guest(int irq)
> /* An interrupt may slip through while freeing an ACKTYPE_EOI irq.
> */
> ASSERT(action->ack_type == ACKTYPE_EOI);
> ASSERT(desc->status & IRQ_DISABLED);
> - desc->handler->end(irq);
> + desc->handler->end(irq, vector);
> return;
> }
>
> @@ -1099,7 +1102,7 @@ static void flush_ready_eoi(void)
> ASSERT(irq > 0);
> desc = irq_to_desc(irq);
> spin_lock(&desc->lock);
> - desc->handler->end(irq);
> + desc->handler->end(irq, peoi[sp].vector);
> spin_unlock(&desc->lock);
> }
>
> @@ -1177,7 +1180,7 @@ void desc_guest_eoi(struct irq_desc *des
> if ( action->ack_type == ACKTYPE_UNMASK )
> {
> ASSERT(cpus_empty(action->cpu_eoi_map));
> - desc->handler->end(irq);
> + desc->handler->end(irq, 0);
> spin_unlock_irq(&desc->lock);
> return;
> }
> @@ -1431,7 +1434,7 @@ static irq_guest_action_t *__pirq_guest_
> case ACKTYPE_UNMASK:
> if ( test_and_clear_bool(pirq->masked) &&
> (--action->in_flight == 0) )
> - desc->handler->end(irq);
> + desc->handler->end(irq, 0);
> break;
> case ACKTYPE_EOI:
> /* NB. If #guests == 0 then we clear the eoi_map later on. */
> diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/amd/iommu_init.c
> --- a/xen/drivers/passthrough/amd/iommu_init.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Aug 30 15:15:56 2011 +0100
> @@ -441,7 +441,7 @@ static unsigned int iommu_msi_startup(un
> return 0;
> }
>
> -static void iommu_msi_end(unsigned int irq)
> +static void iommu_msi_end(unsigned int irq, u8 vector)
> {
> iommu_msi_unmask(irq);
> ack_APIC_irq();
> diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/vtd/iommu.c
> --- a/xen/drivers/passthrough/vtd/iommu.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/drivers/passthrough/vtd/iommu.c Tue Aug 30 15:15:56 2011 +0100
> @@ -971,7 +971,7 @@ static unsigned int dma_msi_startup(unsi
> return 0;
> }
>
> -static void dma_msi_end(unsigned int irq)
> +static void dma_msi_end(unsigned int irq, u8 vector)
> {
> dma_msi_unmask(irq);
> ack_APIC_irq();
> diff -r 227130622561 -r a95fd5d03c20 xen/include/xen/irq.h
> --- a/xen/include/xen/irq.h Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/include/xen/irq.h Tue Aug 30 15:15:56 2011 +0100
> @@ -44,7 +44,7 @@ struct hw_interrupt_type {
> void (*enable)(unsigned int irq);
> void (*disable)(unsigned int irq);
> void (*ack)(unsigned int irq);
> - void (*end)(unsigned int irq);
> + void (*end)(unsigned int irq, u8 vector);
> void (*set_affinity)(unsigned int irq, cpumask_t mask);
> };
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xensource.com
> http://lists.xensource.com/xen-devel
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-09-05 10:43 ` Jan Beulich
@ 2011-09-05 12:29 ` Andrew Cooper
2011-09-05 12:50 ` Jan Beulich
0 siblings, 1 reply; 13+ messages in thread
From: Andrew Cooper @ 2011-09-05 12:29 UTC (permalink / raw)
To: Jan Beulich; +Cc: xen-devel@lists.xensource.com
On 05/09/11 11:43, Jan Beulich wrote:
>>>> On 30.08.11 at 16:17, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>> When migrating IO-APIC line level interrupts between PCPUs, the
>> migration code rewrites the IO-APIC entry to point to the new
>> CPU/Vector before EOI'ing it.
>>
>> The EOI process says that EOI'ing the Local APIC will cause a
>> broadcast with the vector number, which the IO-APIC must listen to to
>> clear the IRR and Status bits.
>>
>> In the case of migrating, the IO-APIC has already been
>> reprogrammed so the EOI broadcast with the old vector fails to match
>> the new vector, leaving the IO-APIC with an outstanding vector,
>> preventing any more use of that line interrupt. This causes a lockup
>> especially when your root device is using PCI INTA (megaraid_sas
>> driver *ehem*)
>>
>> However, the problem is mostly hidden because send_cleanup_vector()
>> causes a cleanup of all moving vectors on the current PCPU in such a
>> way which does not cause the problem, and if the problem has occured,
>> the writes it makes to the IO-APIC clears the IRR and Status bits
>> which unlocks the problem.
>>
>>
>> This fix is distinctly a temporary hack, waiting on a cleanup of the
>> irq code. It checks for the edge case where we have moved the irq,
>> and manually EOI's the old vector with the IO-APIC which correctly
>> clears the IRR and Status bits. Also, it protects the code which
>> updates irq_cfg by disabling interrupts.
>>
>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>
>> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/hpet.c
>> --- a/xen/arch/x86/hpet.c Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/arch/x86/hpet.c Tue Aug 30 15:15:56 2011 +0100
>> @@ -301,7 +301,7 @@ static void hpet_msi_ack(unsigned int ir
>> ack_APIC_irq();
>> }
>>
>> -static void hpet_msi_end(unsigned int irq)
>> +static void hpet_msi_end(unsigned int irq, u8 vector)
>> {
>> }
>>
>> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/i8259.c
>> --- a/xen/arch/x86/i8259.c Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/arch/x86/i8259.c Tue Aug 30 15:15:56 2011 +0100
>> @@ -93,7 +93,7 @@ static unsigned int startup_8259A_irq(un
>> return 0; /* never anything pending */
>> }
>>
>> -static void end_8259A_irq(unsigned int irq)
>> +static void end_8259A_irq(unsigned int irq, u8 vector)
>> {
>> if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
>> enable_8259A_irq(irq);
>> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/io_apic.c
>> --- a/xen/arch/x86/io_apic.c Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/arch/x86/io_apic.c Tue Aug 30 15:15:56 2011 +0100
>> @@ -1690,7 +1690,7 @@ static void mask_and_ack_level_ioapic_ir
>> }
>> }
>>
>> -static void end_level_ioapic_irq (unsigned int irq)
>> +static void end_level_ioapic_irq (unsigned int irq, u8 vector)
>> {
>> unsigned long v;
>> int i;
>> @@ -1739,6 +1739,14 @@ static void end_level_ioapic_irq (unsign
>> */
>> i = IO_APIC_VECTOR(irq);
>>
>> + /* Manually EOI the old vector if we are moving to the new */
>> + if ( vector && i != vector )
>> + {
>> + int ioapic;
>> + for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
>> + io_apic_eoi(ioapic, i);
> While I realize that the patch already went in, this still will need
> adjustment for dealing with old IO-APICs that don't have an EOI
> register (or if we want to stop supporting such, a clear panic()
> rather than subtle and hard to debug failure).
>
> Jan
>
This is a good point. However, due to the use of io_apic_eoi elsewhere
in the code, I don't think this is the only area susceptible to this issue.
I will add it to my todo list for the irq cleanup.
~Andrew
>> + }
>> +
>> v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
>>
>> ack_APIC_irq();
>> @@ -1762,7 +1770,10 @@ static void disable_edge_ioapic_irq(unsi
>> {
>> }
>>
>> -#define end_edge_ioapic_irq disable_edge_ioapic_irq
>> +static void end_edge_ioapic_irq(unsigned int irq, u8 vector)
>> +{
>> +}
>> +
>>
>> /*
>> * Level and edge triggered IO-APIC interrupts need different handling,
>> @@ -1811,7 +1822,7 @@ static void ack_msi_irq(unsigned int irq
>> ack_APIC_irq(); /* ACKTYPE_NONE */
>> }
>>
>> -static void end_msi_irq(unsigned int irq)
>> +static void end_msi_irq(unsigned int irq, u8 vector)
>> {
>> if ( !msi_maskable_irq(irq_desc[irq].msi_desc) )
>> ack_APIC_irq(); /* ACKTYPE_EOI */
>> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/irq.c
>> --- a/xen/arch/x86/irq.c Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/arch/x86/irq.c Tue Aug 30 15:15:56 2011 +0100
>> @@ -345,6 +345,7 @@ static void __do_IRQ_guest(int vector);
>> void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
>>
>> static void enable_none(unsigned int vector) { }
>> +static void end_none(unsigned int irq, u8 vector) { }
>> static unsigned int startup_none(unsigned int vector) { return 0; }
>> static void disable_none(unsigned int vector) { }
>> static void ack_none(unsigned int irq)
>> @@ -353,7 +354,6 @@ static void ack_none(unsigned int irq)
>> }
>>
>> #define shutdown_none disable_none
>> -#define end_none enable_none
>>
>> hw_irq_controller no_irq_type = {
>> "none",
>> @@ -381,6 +381,7 @@ int __assign_irq_vector(int irq, struct
>> static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
>> unsigned int old_vector;
>> int cpu, err;
>> + unsigned long flags;
>> cpumask_t tmp_mask;
>>
>> old_vector = irq_to_vector(irq);
>> @@ -431,6 +432,7 @@ next:
>> /* Found one! */
>> current_vector = vector;
>> current_offset = offset;
>> + local_irq_save(flags);
>> if (old_vector) {
>> cfg->move_in_progress = 1;
>> cpus_copy(cfg->old_cpu_mask, cfg->cpu_mask);
>> @@ -450,6 +452,7 @@ next:
>> if (IO_APIC_IRQ(irq))
>> irq_vector[irq] = vector;
>> err = 0;
>> + local_irq_restore(flags);
>> break;
>> }
>> return err;
>> @@ -657,7 +660,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
>> desc->status &= ~IRQ_INPROGRESS;
>>
>> out:
>> - desc->handler->end(irq);
>> + desc->handler->end(irq, regs->entry_vector);
>> out_no_end:
>> spin_unlock(&desc->lock);
>> irq_exit();
>> @@ -857,7 +860,7 @@ static void irq_guest_eoi_timer_fn(void
>> switch ( action->ack_type )
>> {
>> case ACKTYPE_UNMASK:
>> - desc->handler->end(irq);
>> + desc->handler->end(irq, 0);
>> break;
>> case ACKTYPE_EOI:
>> cpu_eoi_map = action->cpu_eoi_map;
>> @@ -885,7 +888,7 @@ static void __do_IRQ_guest(int irq)
>> /* An interrupt may slip through while freeing an ACKTYPE_EOI irq.
>> */
>> ASSERT(action->ack_type == ACKTYPE_EOI);
>> ASSERT(desc->status & IRQ_DISABLED);
>> - desc->handler->end(irq);
>> + desc->handler->end(irq, vector);
>> return;
>> }
>>
>> @@ -1099,7 +1102,7 @@ static void flush_ready_eoi(void)
>> ASSERT(irq > 0);
>> desc = irq_to_desc(irq);
>> spin_lock(&desc->lock);
>> - desc->handler->end(irq);
>> + desc->handler->end(irq, peoi[sp].vector);
>> spin_unlock(&desc->lock);
>> }
>>
>> @@ -1177,7 +1180,7 @@ void desc_guest_eoi(struct irq_desc *des
>> if ( action->ack_type == ACKTYPE_UNMASK )
>> {
>> ASSERT(cpus_empty(action->cpu_eoi_map));
>> - desc->handler->end(irq);
>> + desc->handler->end(irq, 0);
>> spin_unlock_irq(&desc->lock);
>> return;
>> }
>> @@ -1431,7 +1434,7 @@ static irq_guest_action_t *__pirq_guest_
>> case ACKTYPE_UNMASK:
>> if ( test_and_clear_bool(pirq->masked) &&
>> (--action->in_flight == 0) )
>> - desc->handler->end(irq);
>> + desc->handler->end(irq, 0);
>> break;
>> case ACKTYPE_EOI:
>> /* NB. If #guests == 0 then we clear the eoi_map later on. */
>> diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/amd/iommu_init.c
>> --- a/xen/drivers/passthrough/amd/iommu_init.c Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Aug 30 15:15:56 2011 +0100
>> @@ -441,7 +441,7 @@ static unsigned int iommu_msi_startup(un
>> return 0;
>> }
>>
>> -static void iommu_msi_end(unsigned int irq)
>> +static void iommu_msi_end(unsigned int irq, u8 vector)
>> {
>> iommu_msi_unmask(irq);
>> ack_APIC_irq();
>> diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/vtd/iommu.c
>> --- a/xen/drivers/passthrough/vtd/iommu.c Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/drivers/passthrough/vtd/iommu.c Tue Aug 30 15:15:56 2011 +0100
>> @@ -971,7 +971,7 @@ static unsigned int dma_msi_startup(unsi
>> return 0;
>> }
>>
>> -static void dma_msi_end(unsigned int irq)
>> +static void dma_msi_end(unsigned int irq, u8 vector)
>> {
>> dma_msi_unmask(irq);
>> ack_APIC_irq();
>> diff -r 227130622561 -r a95fd5d03c20 xen/include/xen/irq.h
>> --- a/xen/include/xen/irq.h Thu Aug 25 12:03:14 2011 +0100
>> +++ b/xen/include/xen/irq.h Tue Aug 30 15:15:56 2011 +0100
>> @@ -44,7 +44,7 @@ struct hw_interrupt_type {
>> void (*enable)(unsigned int irq);
>> void (*disable)(unsigned int irq);
>> void (*ack)(unsigned int irq);
>> - void (*end)(unsigned int irq);
>> + void (*end)(unsigned int irq, u8 vector);
>> void (*set_affinity)(unsigned int irq, cpumask_t mask);
>> };
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@lists.xensource.com
>> http://lists.xensource.com/xen-devel
>
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] IRQ: manually EOI migrating line interrupts
2011-09-05 12:29 ` Andrew Cooper
@ 2011-09-05 12:50 ` Jan Beulich
0 siblings, 0 replies; 13+ messages in thread
From: Jan Beulich @ 2011-09-05 12:50 UTC (permalink / raw)
To: Andrew Cooper; +Cc: xen-devel@lists.xensource.com
>>> On 05.09.11 at 14:29, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>
> On 05/09/11 11:43, Jan Beulich wrote:
>>>>> On 30.08.11 at 16:17, Andrew Cooper <andrew.cooper3@citrix.com> wrote:
>>> When migrating IO-APIC line level interrupts between PCPUs, the
>>> migration code rewrites the IO-APIC entry to point to the new
>>> CPU/Vector before EOI'ing it.
>>>
>>> The EOI process says that EOI'ing the Local APIC will cause a
>>> broadcast with the vector number, which the IO-APIC must listen to to
>>> clear the IRR and Status bits.
>>>
>>> In the case of migrating, the IO-APIC has already been
>>> reprogrammed so the EOI broadcast with the old vector fails to match
>>> the new vector, leaving the IO-APIC with an outstanding vector,
>>> preventing any more use of that line interrupt. This causes a lockup
>>> especially when your root device is using PCI INTA (megaraid_sas
>>> driver *ehem*)
>>>
>>> However, the problem is mostly hidden because send_cleanup_vector()
>>> causes a cleanup of all moving vectors on the current PCPU in such a
>>> way which does not cause the problem, and if the problem has occured,
>>> the writes it makes to the IO-APIC clears the IRR and Status bits
>>> which unlocks the problem.
>>>
>>>
>>> This fix is distinctly a temporary hack, waiting on a cleanup of the
>>> irq code. It checks for the edge case where we have moved the irq,
>>> and manually EOI's the old vector with the IO-APIC which correctly
>>> clears the IRR and Status bits. Also, it protects the code which
>>> updates irq_cfg by disabling interrupts.
>>>
>>> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
>>>
>>> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/hpet.c
>>> --- a/xen/arch/x86/hpet.c Thu Aug 25 12:03:14 2011 +0100
>>> +++ b/xen/arch/x86/hpet.c Tue Aug 30 15:15:56 2011 +0100
>>> @@ -301,7 +301,7 @@ static void hpet_msi_ack(unsigned int ir
>>> ack_APIC_irq();
>>> }
>>>
>>> -static void hpet_msi_end(unsigned int irq)
>>> +static void hpet_msi_end(unsigned int irq, u8 vector)
>>> {
>>> }
>>>
>>> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/i8259.c
>>> --- a/xen/arch/x86/i8259.c Thu Aug 25 12:03:14 2011 +0100
>>> +++ b/xen/arch/x86/i8259.c Tue Aug 30 15:15:56 2011 +0100
>>> @@ -93,7 +93,7 @@ static unsigned int startup_8259A_irq(un
>>> return 0; /* never anything pending */
>>> }
>>>
>>> -static void end_8259A_irq(unsigned int irq)
>>> +static void end_8259A_irq(unsigned int irq, u8 vector)
>>> {
>>> if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
>>> enable_8259A_irq(irq);
>>> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/io_apic.c
>>> --- a/xen/arch/x86/io_apic.c Thu Aug 25 12:03:14 2011 +0100
>>> +++ b/xen/arch/x86/io_apic.c Tue Aug 30 15:15:56 2011 +0100
>>> @@ -1690,7 +1690,7 @@ static void mask_and_ack_level_ioapic_ir
>>> }
>>> }
>>>
>>> -static void end_level_ioapic_irq (unsigned int irq)
>>> +static void end_level_ioapic_irq (unsigned int irq, u8 vector)
>>> {
>>> unsigned long v;
>>> int i;
>>> @@ -1739,6 +1739,14 @@ static void end_level_ioapic_irq (unsign
>>> */
>>> i = IO_APIC_VECTOR(irq);
>>>
>>> + /* Manually EOI the old vector if we are moving to the new */
>>> + if ( vector && i != vector )
>>> + {
>>> + int ioapic;
>>> + for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
>>> + io_apic_eoi(ioapic, i);
>> While I realize that the patch already went in, this still will need
>> adjustment for dealing with old IO-APICs that don't have an EOI
>> register (or if we want to stop supporting such, a clear panic()
>> rather than subtle and hard to debug failure).
>>
>> Jan
>>
>
> This is a good point. However, due to the use of io_apic_eoi elsewhere
> in the code, I don't think this is the only area susceptible to this issue.
The only other two uses that I could spot are in directed-EOI specific
code (where a new enough IO-APIC can be implied) and in the code
recently added to clear remoteIRR bits (protected by a version check).
Jan
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2011-09-05 12:50 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-08-30 14:17 [PATCH] IRQ: manually EOI migrating line interrupts Andrew Cooper
2011-08-30 14:19 ` Andrew Cooper
2011-08-30 14:28 ` Andrew Cooper
2011-08-30 14:35 ` Keir Fraser
2011-08-30 15:03 ` Andrew Cooper
2011-08-30 14:38 ` Keir Fraser
2011-08-30 15:19 ` Andrew Cooper
2011-08-30 16:04 ` Keir Fraser
2011-09-05 10:50 ` Jan Beulich
2011-09-05 12:24 ` Andrew Cooper
2011-09-05 10:43 ` Jan Beulich
2011-09-05 12:29 ` Andrew Cooper
2011-09-05 12:50 ` Jan Beulich
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).