From: Andrew Cooper <andrew.cooper3@citrix.com>
To: "xen-devel@lists.xensource.com" <xen-devel@lists.xensource.com>,
Keir Fraser <keir.xen@gmail.com>
Subject: Re: [PATCH] IRQ: manually EOI migrating line interrupts
Date: Tue, 30 Aug 2011 15:28:35 +0100 [thread overview]
Message-ID: <4E5CF393.10903@citrix.com> (raw)
In-Reply-To: <a95fd5d03c2071369d85.1314713862@andrewcoop.uk.xensource.com>
On 30/08/11 15:17, Andrew Cooper wrote:
> When migrating IO-APIC line level interrupts between PCPUs, the
> migration code rewrites the IO-APIC entry to point to the new
> CPU/Vector before EOI'ing it.
>
> The EOI process says that EOI'ing the Local APIC will cause a
> broadcast with the vector number, which the IO-APIC must listen to to
> clear the IRR and Status bits.
>
> In the case of migrating, the IO-APIC has already been
> reprogrammed so the EOI broadcast with the old vector fails to match
> the new vector, leaving the IO-APIC with an outstanding vector,
> preventing any more use of that line interrupt. This causes a lockup
> especially when your root device is using PCI INTA (megaraid_sas
> driver *ehem*)
>
> However, the problem is mostly hidden because send_cleanup_vector()
> causes a cleanup of all moving vectors on the current PCPU in such a
> way which does not cause the problem, and if the problem has occured,
> the writes it makes to the IO-APIC clears the IRR and Status bits
> which unlocks the problem.
>
>
> This fix is distinctly a temporary hack, waiting on a cleanup of the
> irq code. It checks for the edge case where we have moved the irq,
> and manually EOI's the old vector with the IO-APIC which correctly
> clears the IRR and Status bits. Also, it protects the code which
> updates irq_cfg by disabling interrupts.
>
> Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
P.S.
Because this fix actually works, it means that EOI'ing the IO-APIC is
keyed only on vector, and not the target CPU. As a result, having two
different interrupts in the IO-APIC with the same vector will result in
problems when trying to EOI one of them.
I will address this problem as well in my IRQ cleanup, as I cant see any
checks for it currently.
~Andrew
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/hpet.c
> --- a/xen/arch/x86/hpet.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/hpet.c Tue Aug 30 15:15:56 2011 +0100
> @@ -301,7 +301,7 @@ static void hpet_msi_ack(unsigned int ir
> ack_APIC_irq();
> }
>
> -static void hpet_msi_end(unsigned int irq)
> +static void hpet_msi_end(unsigned int irq, u8 vector)
> {
> }
>
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/i8259.c
> --- a/xen/arch/x86/i8259.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/i8259.c Tue Aug 30 15:15:56 2011 +0100
> @@ -93,7 +93,7 @@ static unsigned int startup_8259A_irq(un
> return 0; /* never anything pending */
> }
>
> -static void end_8259A_irq(unsigned int irq)
> +static void end_8259A_irq(unsigned int irq, u8 vector)
> {
> if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
> enable_8259A_irq(irq);
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/io_apic.c
> --- a/xen/arch/x86/io_apic.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/io_apic.c Tue Aug 30 15:15:56 2011 +0100
> @@ -1690,7 +1690,7 @@ static void mask_and_ack_level_ioapic_ir
> }
> }
>
> -static void end_level_ioapic_irq (unsigned int irq)
> +static void end_level_ioapic_irq (unsigned int irq, u8 vector)
> {
> unsigned long v;
> int i;
> @@ -1739,6 +1739,14 @@ static void end_level_ioapic_irq (unsign
> */
> i = IO_APIC_VECTOR(irq);
>
> + /* Manually EOI the old vector if we are moving to the new */
> + if ( vector && i != vector )
> + {
> + int ioapic;
> + for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
> + io_apic_eoi(ioapic, i);
> + }
> +
> v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
>
> ack_APIC_irq();
> @@ -1762,7 +1770,10 @@ static void disable_edge_ioapic_irq(unsi
> {
> }
>
> -#define end_edge_ioapic_irq disable_edge_ioapic_irq
> +static void end_edge_ioapic_irq(unsigned int irq, u8 vector)
> +{
> +}
> +
>
> /*
> * Level and edge triggered IO-APIC interrupts need different handling,
> @@ -1811,7 +1822,7 @@ static void ack_msi_irq(unsigned int irq
> ack_APIC_irq(); /* ACKTYPE_NONE */
> }
>
> -static void end_msi_irq(unsigned int irq)
> +static void end_msi_irq(unsigned int irq, u8 vector)
> {
> if ( !msi_maskable_irq(irq_desc[irq].msi_desc) )
> ack_APIC_irq(); /* ACKTYPE_EOI */
> diff -r 227130622561 -r a95fd5d03c20 xen/arch/x86/irq.c
> --- a/xen/arch/x86/irq.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/arch/x86/irq.c Tue Aug 30 15:15:56 2011 +0100
> @@ -345,6 +345,7 @@ static void __do_IRQ_guest(int vector);
> void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs) { }
>
> static void enable_none(unsigned int vector) { }
> +static void end_none(unsigned int irq, u8 vector) { }
> static unsigned int startup_none(unsigned int vector) { return 0; }
> static void disable_none(unsigned int vector) { }
> static void ack_none(unsigned int irq)
> @@ -353,7 +354,6 @@ static void ack_none(unsigned int irq)
> }
>
> #define shutdown_none disable_none
> -#define end_none enable_none
>
> hw_irq_controller no_irq_type = {
> "none",
> @@ -381,6 +381,7 @@ int __assign_irq_vector(int irq, struct
> static int current_vector = FIRST_DYNAMIC_VECTOR, current_offset = 0;
> unsigned int old_vector;
> int cpu, err;
> + unsigned long flags;
> cpumask_t tmp_mask;
>
> old_vector = irq_to_vector(irq);
> @@ -431,6 +432,7 @@ next:
> /* Found one! */
> current_vector = vector;
> current_offset = offset;
> + local_irq_save(flags);
> if (old_vector) {
> cfg->move_in_progress = 1;
> cpus_copy(cfg->old_cpu_mask, cfg->cpu_mask);
> @@ -450,6 +452,7 @@ next:
> if (IO_APIC_IRQ(irq))
> irq_vector[irq] = vector;
> err = 0;
> + local_irq_restore(flags);
> break;
> }
> return err;
> @@ -657,7 +660,7 @@ asmlinkage void do_IRQ(struct cpu_user_r
> desc->status &= ~IRQ_INPROGRESS;
>
> out:
> - desc->handler->end(irq);
> + desc->handler->end(irq, regs->entry_vector);
> out_no_end:
> spin_unlock(&desc->lock);
> irq_exit();
> @@ -857,7 +860,7 @@ static void irq_guest_eoi_timer_fn(void
> switch ( action->ack_type )
> {
> case ACKTYPE_UNMASK:
> - desc->handler->end(irq);
> + desc->handler->end(irq, 0);
> break;
> case ACKTYPE_EOI:
> cpu_eoi_map = action->cpu_eoi_map;
> @@ -885,7 +888,7 @@ static void __do_IRQ_guest(int irq)
> /* An interrupt may slip through while freeing an ACKTYPE_EOI irq. */
> ASSERT(action->ack_type == ACKTYPE_EOI);
> ASSERT(desc->status & IRQ_DISABLED);
> - desc->handler->end(irq);
> + desc->handler->end(irq, vector);
> return;
> }
>
> @@ -1099,7 +1102,7 @@ static void flush_ready_eoi(void)
> ASSERT(irq > 0);
> desc = irq_to_desc(irq);
> spin_lock(&desc->lock);
> - desc->handler->end(irq);
> + desc->handler->end(irq, peoi[sp].vector);
> spin_unlock(&desc->lock);
> }
>
> @@ -1177,7 +1180,7 @@ void desc_guest_eoi(struct irq_desc *des
> if ( action->ack_type == ACKTYPE_UNMASK )
> {
> ASSERT(cpus_empty(action->cpu_eoi_map));
> - desc->handler->end(irq);
> + desc->handler->end(irq, 0);
> spin_unlock_irq(&desc->lock);
> return;
> }
> @@ -1431,7 +1434,7 @@ static irq_guest_action_t *__pirq_guest_
> case ACKTYPE_UNMASK:
> if ( test_and_clear_bool(pirq->masked) &&
> (--action->in_flight == 0) )
> - desc->handler->end(irq);
> + desc->handler->end(irq, 0);
> break;
> case ACKTYPE_EOI:
> /* NB. If #guests == 0 then we clear the eoi_map later on. */
> diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/amd/iommu_init.c
> --- a/xen/drivers/passthrough/amd/iommu_init.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/drivers/passthrough/amd/iommu_init.c Tue Aug 30 15:15:56 2011 +0100
> @@ -441,7 +441,7 @@ static unsigned int iommu_msi_startup(un
> return 0;
> }
>
> -static void iommu_msi_end(unsigned int irq)
> +static void iommu_msi_end(unsigned int irq, u8 vector)
> {
> iommu_msi_unmask(irq);
> ack_APIC_irq();
> diff -r 227130622561 -r a95fd5d03c20 xen/drivers/passthrough/vtd/iommu.c
> --- a/xen/drivers/passthrough/vtd/iommu.c Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/drivers/passthrough/vtd/iommu.c Tue Aug 30 15:15:56 2011 +0100
> @@ -971,7 +971,7 @@ static unsigned int dma_msi_startup(unsi
> return 0;
> }
>
> -static void dma_msi_end(unsigned int irq)
> +static void dma_msi_end(unsigned int irq, u8 vector)
> {
> dma_msi_unmask(irq);
> ack_APIC_irq();
> diff -r 227130622561 -r a95fd5d03c20 xen/include/xen/irq.h
> --- a/xen/include/xen/irq.h Thu Aug 25 12:03:14 2011 +0100
> +++ b/xen/include/xen/irq.h Tue Aug 30 15:15:56 2011 +0100
> @@ -44,7 +44,7 @@ struct hw_interrupt_type {
> void (*enable)(unsigned int irq);
> void (*disable)(unsigned int irq);
> void (*ack)(unsigned int irq);
> - void (*end)(unsigned int irq);
> + void (*end)(unsigned int irq, u8 vector);
> void (*set_affinity)(unsigned int irq, cpumask_t mask);
> };
>
--
Andrew Cooper - Dom0 Kernel Engineer, Citrix XenServer
T: +44 (0)1223 225 900, http://www.citrix.com
next prev parent reply other threads:[~2011-08-30 14:28 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-08-30 14:17 [PATCH] IRQ: manually EOI migrating line interrupts Andrew Cooper
2011-08-30 14:19 ` Andrew Cooper
2011-08-30 14:28 ` Andrew Cooper [this message]
2011-08-30 14:35 ` Keir Fraser
2011-08-30 15:03 ` Andrew Cooper
2011-08-30 14:38 ` Keir Fraser
2011-08-30 15:19 ` Andrew Cooper
2011-08-30 16:04 ` Keir Fraser
2011-09-05 10:50 ` Jan Beulich
2011-09-05 12:24 ` Andrew Cooper
2011-09-05 10:43 ` Jan Beulich
2011-09-05 12:29 ` Andrew Cooper
2011-09-05 12:50 ` Jan Beulich
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4E5CF393.10903@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=keir.xen@gmail.com \
--cc=xen-devel@lists.xensource.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).