All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] interrupt: discover and disable very frequent interrupts
@ 2022-09-30  6:40 Zhang Xincheng
  2022-09-30  7:05 ` Hans de Goede
                   ` (2 more replies)
  0 siblings, 3 replies; 13+ messages in thread
From: Zhang Xincheng @ 2022-09-30  6:40 UTC (permalink / raw)
  To: tglx
  Cc: linux-kernel, maz, oleksandr, hdegoede, bigeasy, mark.rutland,
	michael, zhangxincheng

From: zhangxincheng <zhangxincheng@uniontech.com>

In some cases, a peripheral's interrupt will be triggered frequently,
which will keep the CPU processing the interrupt and eventually cause
the RCU to report rcu_sched self-detected stall on the CPU.

[  838.131628] rcu: INFO: rcu_sched self-detected stall on CPU
[  838.137189] rcu:     0-....: (194839 ticks this GP) idle=f02/1/0x4000000000000004
softirq=9993/9993 fqs=97428
[  838.146912] rcu:      (t=195015 jiffies g=6773 q=0)
[  838.151516] Task dump for CPU 0:
[  838.154730] systemd-sleep   R  running task        0  3445      1 0x0000000a

Signed-off-by: zhangxincheng <zhangxincheng@uniontech.com>
Change-Id: I9c92146f2772eae383c16c8c10de028b91e07150
Signed-off-by: zhangxincheng <zhangxincheng@uniontech.com>
---
 include/linux/irqdesc.h |  2 ++
 kernel/irq/spurious.c   | 52 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 1cd4e36890fb..a3bd521c3557 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -102,6 +102,8 @@ struct irq_desc {
 	int			parent_irq;
 	struct module		*owner;
 	const char		*name;
+	u32 gap_count;
+	u64 gap_time;
 } ____cacheline_internodealigned_in_smp;
 
 #ifdef CONFIG_SPARSE_IRQ
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 02b2daf07441..b7162a10d92c 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -222,6 +222,38 @@ static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 }
 
+/*
+ * Some bad hardware will trigger interrupts very frequently, which will
+ * cause the CPU to process hardware interrupts all the time. So when
+ * we find this out, the interrupt should be disabled.
+ */
+static void __report_so_frequently_irq(struct irq_desc *desc, irqreturn_t action_ret)
+{
+	unsigned int irq = irq_desc_get_irq(desc);
+	struct irqaction *action;
+	unsigned long flags;
+
+	printk(KERN_ERR "irq %d: triggered too frequently\n",irq);
+	dump_stack();
+	printk(KERN_ERR "handlers:\n");
+
+	/*
+	 * We need to take desc->lock here. note_interrupt() is called
+	 * w/o desc->lock held, but IRQ_PROGRESS set. We might race
+	 * with something else removing an action. It's ok to take
+	 * desc->lock here. See synchronize_irq().
+	 */
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	for_each_action_of_desc(desc, action) {
+		printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
+		if (action->thread_fn)
+			printk(KERN_CONT " threaded [<%p>] %pf",
+					action->thread_fn, action->thread_fn);
+		printk(KERN_CONT "\n");
+	}
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+}
+
 static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
 {
 	static int count = 100;
@@ -273,6 +305,26 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
 {
 	unsigned int irq;
 
+	if((desc->gap_count & 0xffff0000) == 0)
+		desc->gap_time = get_jiffies_64();
+
+	desc->gap_count ++;
+
+	if((desc->gap_count & 0x0000ffff) >= 2000) {
+		if((get_jiffies_64() - desc->gap_time) < HZ) {
+			desc->gap_count += 0x00010000;
+			desc->gap_count &= 0xffff0000;
+		} else {
+			desc->gap_count = 0;
+		}
+
+		if((desc->gap_count >> 16) > 30) {
+			__report_so_frequently_irq(desc, action_ret);
+			irq_disable(desc);
+		}
+	}
+
+
 	if (desc->istate & IRQS_POLL_INPROGRESS ||
 	    irq_settings_is_polled(desc))
 		return;
-- 
2.20.1


^ permalink raw reply related	[flat|nested] 13+ messages in thread
* Re: [PATCH] interrupt: discover and disable very frequent interrupts
@ 2022-09-30  8:31 张鑫城
  0 siblings, 0 replies; 13+ messages in thread
From: 张鑫城 @ 2022-09-30  8:31 UTC (permalink / raw)
  To: hdegoede, tglx
  Cc: linux-kernel, maz, oleksandr, bigeasy, mark.rutland, michael

Hi,

Thank you very much for your valuable suggestions, I have modified the patch as follows:

Subject: [PATCH] interrupt: discover and disable very frequent interrupts

In some cases, a peripheral's interrupt will be triggered frequently,
which will keep the CPU processing the interrupt and eventually cause
the RCU to report rcu_sched self-detected stall on the CPU.

[  838.131628] rcu: INFO: rcu_sched self-detected stall on CPU
[  838.137189] rcu:     0-....: (194839 ticks this GP) idle=f02/1/0x4000000000000004
softirq=9993/9993 fqs=97428
[  838.146912] rcu:      (t=195015 jiffies g=6773 q=0)
[  838.151516] Task dump for CPU 0:
[  838.154730] systemd-sleep   R  running task        0  3445      1 0x0000000a

Signed-off-by: zhangxincheng <zhangxincheng@uniontech.com>
Change-Id: I9c92146f2772eae383c16c8c10de028b91e07150
---
 include/linux/irqdesc.h |  2 ++
 kernel/irq/spurious.c   | 36 +++++++++++++++++++++++++++++-------
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 1cd4e36890fb..a3bd521c3557 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -102,6 +102,8 @@ struct irq_desc {
 	int			parent_irq;
 	struct module		*owner;
 	const char		*name;
+	u32 gap_count;
+	u64 gap_time;
 } ____cacheline_internodealigned_in_smp;
 
 #ifdef CONFIG_SPARSE_IRQ
diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c
index 02b2daf07441..75bd0088446a 100644
--- a/kernel/irq/spurious.c
+++ b/kernel/irq/spurious.c
@@ -188,19 +188,21 @@ static inline int bad_action_ret(irqreturn_t action_ret)
  *
  * (The other 100-of-100,000 interrupts may have been a correctly
  *  functioning device sharing an IRQ with the failing one)
+ *
+ * Some bad hardware will trigger interrupts very frequently, which will
+ * cause the CPU to process hardware interrupts all the time. So when
+ * we find this out, the interrupt should be disabled.
  */
-static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
+static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret, const char *msg)
 {
 	unsigned int irq = irq_desc_get_irq(desc);
 	struct irqaction *action;
 	unsigned long flags;
 
 	if (bad_action_ret(action_ret)) {
-		printk(KERN_ERR "irq event %d: bogus return value %x\n",
-				irq, action_ret);
+		printk(msg, irq, action_ret);
 	} else {
-		printk(KERN_ERR "irq %d: nobody cared (try booting with "
-				"the \"irqpoll\" option)\n", irq);
+		printk(msg, irq);
 	}
 	dump_stack();
 	printk(KERN_ERR "handlers:\n");
@@ -228,7 +230,7 @@ static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
 
 	if (count > 0) {
 		count--;
-		__report_bad_irq(desc, action_ret);
+		__report_bad_irq(desc, action_ret, KERN_ERR "irq event %d: bogus return value %x\n");
 	}
 }
 
@@ -282,6 +284,25 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
 		return;
 	}
 
+	if((desc->gap_count & 0xffff0000) == 0)
+		desc->gap_time = get_jiffies_64();
+
+	desc->gap_count ++;
+
+	if((desc->gap_count & 0x0000ffff) >= 2000) {
+		if((get_jiffies_64() - desc->gap_time) < HZ) {
+			desc->gap_count += 0x00010000;
+			desc->gap_count &= 0xffff0000;
+		} else {
+			desc->gap_count = 0;
+		}
+
+		if((desc->gap_count >> 16) > 30) {
+		__report_bad_irq(desc, action_ret, KERN_ERR "irq %d: triggered too frequently\n");
+			irq_disable(desc);
+		}
+	}
+
 	/*
 	 * We cannot call note_interrupt from the threaded handler
 	 * because we need to look at the compound of all handlers
@@ -416,7 +437,8 @@ void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
 		/*
 		 * The interrupt is stuck
 		 */
-		__report_bad_irq(desc, action_ret);
+		__report_bad_irq(desc, action_ret, KERN_ERR "irq %d: nobody cared (try booting"
+				"with the \"irqpoll\" option)\n");
 		/*
 		 * Now kill the IRQ
 		 */
--
2.20.1


Regards,

Zhang Xincheng

^ permalink raw reply related	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2022-10-18  1:07 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-09-30  6:40 [PATCH] interrupt: discover and disable very frequent interrupts Zhang Xincheng
2022-09-30  7:05 ` Hans de Goede
2022-09-30  9:23 ` Marc Zyngier
2022-09-30  9:57   ` Zhang Xincheng
2022-09-30 10:37     ` Marc Zyngier
2022-10-09  1:31       ` Zhang Xincheng
2022-10-09  2:13         ` Marc Zyngier
2022-10-09 10:02           ` Zhang Xincheng
2022-10-17 11:21             ` Thomas Gleixner
2022-10-18  1:05               ` Zhang Xincheng
2022-10-08  5:21 ` [interrupt] 998288b7e8: RIP:cpuidle_enter_state kernel test robot
2022-10-08  5:21   ` kernel test robot
  -- strict thread matches above, loose matches on Subject: below --
2022-09-30  8:31 [PATCH] interrupt: discover and disable very frequent interrupts 张鑫城

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.