* [PATCH 1/2] IRQ: fix performance regression on large IA64 systems
@ 2009-07-03 9:01 Jiri Slaby
2009-07-03 9:01 ` [PATCH 2/2] IRQ: remove irqfixup MODULE_PARM_DESC Jiri Slaby
` (2 more replies)
0 siblings, 3 replies; 5+ messages in thread
From: Jiri Slaby @ 2009-07-03 9:01 UTC (permalink / raw)
To: mingo; +Cc: tglx, hpa, x86, yinghai, linux-kernel, Jiri Slaby, Bernhard Walle
Commit b60c1f6ffd88850079ae419aa933ab0eddbd5535
(drop note_interrupt() for per-CPU for proper scaling) removed call to
note_interrupt() in __do_IRQ(). Commit
d85a60d85ea5b7c597508c1510c88e657773d378
(Add IRQF_IRQPOLL flag (common code)) added it again, because it's needed
for irqpoll.
This patch now introduces a new parameter 'only_fixup' for
note_interrupt(). This parameter determines two cases:
TRUE => The function should be only executed when irqfixup is set.
Either 'irqpoll' or 'irqfixup' directly set that.
FALSE => Just the behaviour as note_interrupt() always had.
Now the patch converts all calls of note_interrupt() to only_fixup=FALSE,
except the call that has been removed by b60c1f6ffd.
So that call is always done, but the body is only executed when either
'irqpoll' or 'irqfixup' are specified.
This is needed because __do_IRQ() calls note_interrupt() to record IRQ
statistics. It ends up creating serious cache line contention,
enough that a 1024p system live locks under the crushing weight of the
timer tick.
The note_interrupt() call modifies fields in the irq_desc_t structure.
For PER_CPU timer interrupts (on ia64 machines) this causes cacheline
contention.
Systems with 1024 processors take an extremely long time to boot up, as
most of the time is spent attempting to service timer interrupts. With
noirqdebug added to the boot line, the system boots in close to the normal
amount of time.
Based on Bernhard's patch.
Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Bernhard Walle <bernhard.walle@gmx.de>
---
arch/arm/mach-ns9xxx/irq.c | 2 +-
arch/powerpc/platforms/cell/interrupt.c | 2 +-
drivers/mfd/ezx-pcap.c | 2 +-
drivers/mfd/twl4030-irq.c | 2 +-
include/linux/irq.h | 2 +-
kernel/irq/chip.c | 10 +++++-----
kernel/irq/handle.c | 4 ++--
kernel/irq/spurious.c | 10 +++++++++-
8 files changed, 21 insertions(+), 13 deletions(-)
diff --git a/arch/arm/mach-ns9xxx/irq.c b/arch/arm/mach-ns9xxx/irq.c
index feb0e54..f559a75 100644
--- a/arch/arm/mach-ns9xxx/irq.c
+++ b/arch/arm/mach-ns9xxx/irq.c
@@ -85,7 +85,7 @@ static void handle_prio_irq(unsigned int irq, struct irq_desc *desc)
/* XXX: There is no direct way to access noirqdebug, so check
* unconditionally for spurious irqs...
* Maybe this function should go to kernel/irq/chip.c? */
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
diff --git a/arch/powerpc/platforms/cell/interrupt.c b/arch/powerpc/platforms/cell/interrupt.c
index 882e470..3742589 100644
--- a/arch/powerpc/platforms/cell/interrupt.c
+++ b/arch/powerpc/platforms/cell/interrupt.c
@@ -268,7 +268,7 @@ static void handle_iic_irq(unsigned int irq, struct irq_desc *desc)
spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
spin_lock(&desc->lock);
} while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
diff --git a/drivers/mfd/ezx-pcap.c b/drivers/mfd/ezx-pcap.c
index c1de4af..5007ff0 100644
--- a/drivers/mfd/ezx-pcap.c
+++ b/drivers/mfd/ezx-pcap.c
@@ -176,7 +176,7 @@ static void pcap_isr_work(struct work_struct *work)
break;
if (desc->status & IRQ_DISABLED)
- note_interrupt(irq, desc, IRQ_NONE);
+ note_interrupt(irq, desc, IRQ_NONE, false);
else
desc->handle_irq(irq, desc);
}
diff --git a/drivers/mfd/twl4030-irq.c b/drivers/mfd/twl4030-irq.c
index bae61b2..e00e598 100644
--- a/drivers/mfd/twl4030-irq.c
+++ b/drivers/mfd/twl4030-irq.c
@@ -233,7 +233,7 @@ static int twl4030_irq_thread(void *data)
*/
if (d->status & IRQ_DISABLED)
note_interrupt(module_irq, d,
- IRQ_NONE);
+ IRQ_NONE, false);
else
d->handle_irq(module_irq, d);
}
diff --git a/include/linux/irq.h b/include/linux/irq.h
index cb2e77a..69707ff 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -322,7 +322,7 @@ static inline void generic_handle_irq(unsigned int irq)
/* Handling of unhandled and spurious interrupts: */
extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
- irqreturn_t action_ret);
+ irqreturn_t action_ret, bool only_fixup);
/* Resending of interrupts :*/
void check_irq_resend(struct irq_desc *desc, unsigned int irq);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 13c68e7..0e54062 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -333,7 +333,7 @@ handle_simple_irq(unsigned int irq, struct irq_desc *desc)
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
@@ -378,7 +378,7 @@ handle_level_irq(unsigned int irq, struct irq_desc *desc)
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
@@ -431,7 +431,7 @@ handle_fasteoi_irq(unsigned int irq, struct irq_desc *desc)
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
spin_lock(&desc->lock);
desc->status &= ~IRQ_INPROGRESS;
@@ -509,7 +509,7 @@ handle_edge_irq(unsigned int irq, struct irq_desc *desc)
spin_unlock(&desc->lock);
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
spin_lock(&desc->lock);
} while ((desc->status & (IRQ_PENDING | IRQ_DISABLED)) == IRQ_PENDING);
@@ -538,7 +538,7 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
action_ret = handle_IRQ_event(irq, desc->action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
if (desc->chip->eoi)
desc->chip->eoi(irq);
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 065205b..b535926 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -463,7 +463,7 @@ unsigned int __do_IRQ(unsigned int irq)
if (likely(!(desc->status & IRQ_DISABLED))) {
action_ret = handle_IRQ_event(irq, desc->action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, true);
}
desc->chip->end(irq);
return 1;
@@ -517,7 +517,7 @@ unsigned int __do_IRQ(unsigned int irq)
action_ret = handle_IRQ_event(irq, action);
if (!noirqdebug)
- note_interrupt(irq, desc, action_ret);
+ note_interrupt(irq, desc, action_ret, false);
spin_lock(&desc->lock);
if (likely(!(desc->status & IRQ_PENDING)))
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 4d56829..c0463b9 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -223,9 +223,17 @@ try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
return action && (action->flags & IRQF_IRQPOLL);
}
+/*
+ * The parameter "only_fixup" means that the function should be only executed
+ * if this parameter is set either to false or to true simultaneously with
+ * irqfixup enabled.
+ */
void note_interrupt(unsigned int irq, struct irq_desc *desc,
- irqreturn_t action_ret)
+ irqreturn_t action_ret, bool only_fixup)
{
+ if (only_fixup && irqfixup == 0)
+ return;
+
if (unlikely(action_ret != IRQ_HANDLED)) {
/*
* If we are seeing only the odd spurious IRQ caused by
--
1.6.3.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* [PATCH 2/2] IRQ: remove irqfixup MODULE_PARM_DESC
2009-07-03 9:01 [PATCH 1/2] IRQ: fix performance regression on large IA64 systems Jiri Slaby
@ 2009-07-03 9:01 ` Jiri Slaby
2009-07-04 10:26 ` [PATCH 1/2] IRQ: fix performance regression on large IA64 systems Ingo Molnar
2009-07-05 19:26 ` Thomas Gleixner
2 siblings, 0 replies; 5+ messages in thread
From: Jiri Slaby @ 2009-07-03 9:01 UTC (permalink / raw)
To: mingo; +Cc: tglx, hpa, x86, yinghai, linux-kernel, Jiri Slaby
It's wrong and compiled-time wiped away anyway.
Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
---
kernel/irq/spurious.c | 1 -
1 files changed, 0 insertions(+), 1 deletions(-)
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index c0463b9..0162ebf 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -305,7 +305,6 @@ static int __init irqfixup_setup(char *str)
__setup("irqfixup", irqfixup_setup);
module_param(irqfixup, int, 0644);
-MODULE_PARM_DESC("irqfixup", "0: No fixup, 1: irqfixup mode, 2: irqpoll mode");
static int __init irqpoll_setup(char *str)
{
--
1.6.3.2
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] IRQ: fix performance regression on large IA64 systems
2009-07-03 9:01 [PATCH 1/2] IRQ: fix performance regression on large IA64 systems Jiri Slaby
2009-07-03 9:01 ` [PATCH 2/2] IRQ: remove irqfixup MODULE_PARM_DESC Jiri Slaby
@ 2009-07-04 10:26 ` Ingo Molnar
2009-07-05 19:26 ` Thomas Gleixner
2 siblings, 0 replies; 5+ messages in thread
From: Ingo Molnar @ 2009-07-04 10:26 UTC (permalink / raw)
To: Jiri Slaby; +Cc: mingo, tglx, hpa, x86, yinghai, linux-kernel, Bernhard Walle
* Jiri Slaby <jirislaby@gmail.com> wrote:
> Commit b60c1f6ffd88850079ae419aa933ab0eddbd5535
> (drop note_interrupt() for per-CPU for proper scaling) removed call to
> note_interrupt() in __do_IRQ(). Commit
> d85a60d85ea5b7c597508c1510c88e657773d378
> (Add IRQF_IRQPOLL flag (common code)) added it again, because it's needed
> for irqpoll.
>
> This patch now introduces a new parameter 'only_fixup' for
> note_interrupt(). This parameter determines two cases:
>
> TRUE => The function should be only executed when irqfixup is set.
> Either 'irqpoll' or 'irqfixup' directly set that.
>
> FALSE => Just the behaviour as note_interrupt() always had.
>
> Now the patch converts all calls of note_interrupt() to
> only_fixup=FALSE, except the call that has been removed by
> b60c1f6ffd. So that call is always done, but the body is only
> executed when either 'irqpoll' or 'irqfixup' are specified.
>
> This is needed because __do_IRQ() calls note_interrupt() to record
> IRQ statistics. It ends up creating serious cache line contention,
> enough that a 1024p system live locks under the crushing weight of
> the timer tick.
>
> The note_interrupt() call modifies fields in the irq_desc_t
> structure. For PER_CPU timer interrupts (on ia64 machines) this
> causes cacheline contention.
>
> Systems with 1024 processors take an extremely long time to boot
> up, as most of the time is spent attempting to service timer
> interrupts. With noirqdebug added to the boot line, the system
> boots in close to the normal amount of time.
What would be the effect/cost of enabling this by default? Seems
desirable and eventually we'll hit those problems with regular
systems too ...
Ingo
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] IRQ: fix performance regression on large IA64 systems
2009-07-03 9:01 [PATCH 1/2] IRQ: fix performance regression on large IA64 systems Jiri Slaby
2009-07-03 9:01 ` [PATCH 2/2] IRQ: remove irqfixup MODULE_PARM_DESC Jiri Slaby
2009-07-04 10:26 ` [PATCH 1/2] IRQ: fix performance regression on large IA64 systems Ingo Molnar
@ 2009-07-05 19:26 ` Thomas Gleixner
2009-07-30 22:22 ` Jiri Slaby
2 siblings, 1 reply; 5+ messages in thread
From: Thomas Gleixner @ 2009-07-05 19:26 UTC (permalink / raw)
To: Jiri Slaby; +Cc: mingo, hpa, x86, yinghai, linux-kernel, Bernhard Walle
On Fri, 3 Jul 2009, Jiri Slaby wrote:
> Commit b60c1f6ffd88850079ae419aa933ab0eddbd5535
> (drop note_interrupt() for per-CPU for proper scaling) removed call to
> note_interrupt() in __do_IRQ(). Commit
> d85a60d85ea5b7c597508c1510c88e657773d378
> (Add IRQF_IRQPOLL flag (common code)) added it again, because it's needed
> for irqpoll.
>
> This patch now introduces a new parameter 'only_fixup' for
> note_interrupt(). This parameter determines two cases:
>
> TRUE => The function should be only executed when irqfixup is set.
> Either 'irqpoll' or 'irqfixup' directly set that.
>
> FALSE => Just the behaviour as note_interrupt() always had.
>
> Now the patch converts all calls of note_interrupt() to only_fixup=FALSE,
> except the call that has been removed by b60c1f6ffd.
> So that call is always done, but the body is only executed when either
> 'irqpoll' or 'irqfixup' are specified.
>
> This is needed because __do_IRQ() calls note_interrupt() to record IRQ
> statistics. It ends up creating serious cache line contention,
> enough that a 1024p system live locks under the crushing weight of the
> timer tick.
>
> The note_interrupt() call modifies fields in the irq_desc_t structure.
> For PER_CPU timer interrupts (on ia64 machines) this causes cacheline
> contention.
>
> Systems with 1024 processors take an extremely long time to boot up, as
> most of the time is spent attempting to service timer interrupts. With
> noirqdebug added to the boot line, the system boots in close to the normal
> amount of time.
Hmm. I'm not really happy about that patch. It's all about percpu
interrupts which happen to have the same irq number. I think the
correct thing to do is to use the handle_percpu_irq() handler and
modify handle_percpu_irq to call note_interrupt() only when the return
value of the action handler is IRQ_NONE. Otherwise we can leave
everything untouched.
Thanks,
tglx
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH 1/2] IRQ: fix performance regression on large IA64 systems
2009-07-05 19:26 ` Thomas Gleixner
@ 2009-07-30 22:22 ` Jiri Slaby
0 siblings, 0 replies; 5+ messages in thread
From: Jiri Slaby @ 2009-07-30 22:22 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: mingo, hpa, x86, yinghai, linux-kernel, Bernhard Walle
On 07/05/2009 09:26 PM, Thomas Gleixner wrote:
> On Fri, 3 Jul 2009, Jiri Slaby wrote:
>> Commit b60c1f6ffd88850079ae419aa933ab0eddbd5535
>> (drop note_interrupt() for per-CPU for proper scaling) removed call to
>> note_interrupt() in __do_IRQ(). Commit
>> d85a60d85ea5b7c597508c1510c88e657773d378
>> (Add IRQF_IRQPOLL flag (common code)) added it again, because it's needed
>> for irqpoll.
>>
>> This patch now introduces a new parameter 'only_fixup' for
>> note_interrupt(). This parameter determines two cases:
...
> Hmm. I'm not really happy about that patch. It's all about percpu
> interrupts which happen to have the same irq number. I think the
> correct thing to do is to use the handle_percpu_irq() handler and
> modify handle_percpu_irq to call note_interrupt() only when the return
> value of the action handler is IRQ_NONE. Otherwise we can leave
> everything untouched.
Sorry for the delay, I waited for test results. I have the patch
attached and the boot still lasts very long. Do you see any obvious
mistake which I don't see? (It's 2.6.27-based, therefore no irq_to_desc.)
Thanks.
---
arch/ia64/kernel/irq_ia64.c | 9 ++++-----
kernel/irq/chip.c | 2 +-
2 files changed, 5 insertions(+), 6 deletions(-)
--- a/arch/ia64/kernel/irq_ia64.c
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -624,14 +624,13 @@ static struct irqaction tlb_irqaction =
void
ia64_native_register_percpu_irq (ia64_vector vec, struct irqaction *action)
{
- irq_desc_t *desc;
- unsigned int irq;
+ unsigned int irq = vec;
+ irq_desc_t *desc = irq_desc + irq;
- irq = vec;
BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL));
- desc = irq_desc + irq;
desc->status |= IRQ_PER_CPU;
- desc->chip = &irq_type_ia64_lsapic;
+ set_irq_chip_and_handler(irq, &irq_type_ia64_lsapic,
+ &handle_percpu_irq);
if (action)
setup_irq(irq, action);
}
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -531,7 +531,7 @@ handle_percpu_irq(unsigned int irq, stru
desc->chip->ack(irq);
action_ret = handle_IRQ_event(irq, desc->action);
- if (!noirqdebug)
+ if (!noirqdebug && action_ret == IRQ_NONE)
note_interrupt(irq, desc, action_ret);
if (desc->chip->eoi)
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2009-07-30 22:30 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-03 9:01 [PATCH 1/2] IRQ: fix performance regression on large IA64 systems Jiri Slaby
2009-07-03 9:01 ` [PATCH 2/2] IRQ: remove irqfixup MODULE_PARM_DESC Jiri Slaby
2009-07-04 10:26 ` [PATCH 1/2] IRQ: fix performance regression on large IA64 systems Ingo Molnar
2009-07-05 19:26 ` Thomas Gleixner
2009-07-30 22:22 ` Jiri Slaby
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox