public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Michael Kelley <mhklinux@outlook.com>,
	Dmitry Ilvokhin <d@ilvokhin.com>, Radu Rendec <radu@rendec.net>,
	Jan Kiszka <jan.kiszka@siemens.com>,
	Kieran Bingham <kbingham@kernel.org>,
	Florian Fainelli <florian.fainelli@broadcom.com>,
	Marc Zyngier <maz@kernel.org>
Subject: [patch V4 00/15] Improve /proc/interrupts further
Date: Tue, 31 Mar 2026 09:25:24 +0200	[thread overview]
Message-ID: <20260331071453.172185305@kernel.org> (raw)

This is a follow up to v3 which can be found here:

  https://lore.kernel.org/20260326214345.019130211@kernel.org

The v1 cover letter contains a full analysis, explanation and numbers:

  https://lore.kernel.org/20260303150539.513068586@kernel.org

TLDR:

  - The performance of reading of /proc/interrupts has been improved
    piecewise over the years, but most of the low hanging fruit has been
    left on the table.

Changes vs. V2:

   - Reworked the NMI interrupt cleanup to be RT compatible. That was
     noticed when trying to address the next point.

   - Made per CPU and NMI type teardown update the valid for proc condition

   - Fixed the brainfart of writing to __ro_after_init memory by seperating
     the decision to emit a particular counter out into a simple bitmap and
     making the descriptor table const.

   - Updated the GDB script changes to the bitmap mechanism and removed the
     stale ERR/MIS prints which are not longer required.

   - Made irq_stat_init() a late initcall so it's guaranteed that the
     platform IPI and the posted MSI mechanisms have been initialized

   - Add the AMD/HYGON deferred MCE vector to the conditionals and omit it
     from emission when running on a CPU from other vendors

Delta patch against v3 is below.

The series applies on top of v7.0-rc3 and is also available via git:

    git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git irq-proc-v4

Thanks,

	tglx
---
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 25d22e33e58c..194dfff84cb1 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -47,6 +47,4 @@ void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 #endif
 
-void irq_init_stats(void);
-
 #endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 0b3723cec0b9..de1c35fa5e75 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -76,7 +76,7 @@ struct irq_stat_info {
 #define IDS(idx, sym, txt) [IRQ_COUNT_##idx] =				\
 	{ .skip_vector = DEFAULT_SUPPRESSED_VECTOR, .symbol = sym, .text = txt }
 
-static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
+static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = {
 	ISS(NMI,			"NMI", "  Non-maskable interrupts\n"),
 #ifdef CONFIG_X86_LOCAL_APIC
 	ISS(APIC_TIMER,			"LOC", "  Local timer interrupts\n"),
@@ -128,26 +128,36 @@ static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
 #endif
 };
 
-void __init irq_init_stats(void)
+static DECLARE_BITMAP(irq_stat_count_show, IRQ_COUNT_MAX) __read_mostly;
+
+static int __init irq_init_stats(void)
 {
-	struct irq_stat_info *info = irq_stat_info;
+	const struct irq_stat_info *info = irq_stat_info;
 
 	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
-		if (info->skip_vector && info->skip_vector != DEFAULT_SUPPRESSED_VECTOR &&
-		    test_bit(info->skip_vector, system_vectors))
-			info->skip_vector = 0;
+		if (!info->skip_vector || (info->skip_vector != DEFAULT_SUPPRESSED_VECTOR &&
+					   test_bit(info->skip_vector, system_vectors)))
+			set_bit(i, irq_stat_count_show);
 	}
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (!x86_platform_ipi_callback)
-		irq_stat_info[IRQ_COUNT_X86_PLATFORM_IPI].skip_vector = 1;
+		clear_bit(IRQ_COUNT_X86_PLATFORM_IPI, irq_stat_count_show);
 #endif
 
 #ifdef CONFIG_X86_POSTED_MSI
 	if (!posted_msi_enabled())
-		irq_stat_info[IRQ_COUNT_POSTED_MSI_NOTIFICATION].skip_vector = 1;
+		clear_bit(IRQ_COUNT_POSTED_MSI_NOTIFICATION, irq_stat_count_show);
+#endif
+
+#ifdef CONFIG_X86_MCE_AMD
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+		clear_bit(IRQ_COUNT_DEFERRED_ERROR, irq_stat_count_show);
 #endif
+	return 0;
 }
+late_initcall(irq_init_stats);
 
 /*
  * Used for default enabled counters to increment the stats and to enable the
@@ -156,8 +166,7 @@ void __init irq_init_stats(void)
 void irq_stat_inc_and_enable(enum irq_stat_counts which)
 {
 	this_cpu_inc(irq_stat.counts[which]);
-	/* Pairs with the READ_ONCE() in arch_show_interrupts() */
-	WRITE_ONCE(irq_stat_info[which].skip_vector, 0);
+	set_bit(which, irq_stat_count_show);
 }
 
 #ifdef CONFIG_PROC_FS
@@ -169,7 +178,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	const struct irq_stat_info *info = irq_stat_info;
 
 	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
-		if (READ_ONCE(info->skip_vector))
+		if (!test_bit(i, irq_stat_count_show))
 			continue;
 
 		seq_printf(p, "%*s:", prec, info->symbol);
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 325c0ad8fb9c..6ab9eac64670 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -104,8 +104,6 @@ void __init native_init_IRQ(void)
 	if (!cpu_feature_enabled(X86_FEATURE_FRED))
 		idt_setup_apic_and_irq_gates();
 
-	irq_init_stats();
-
 	lapic_assign_system_vectors();
 
 	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) {
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c4ebd17233f3..ae8b06e01948 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2028,24 +2028,32 @@ const void *free_irq(unsigned int irq, void *dev_id)
 }
 EXPORT_SYMBOL(free_irq);
 
-/* This function must be called with desc->lock held */
 static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
 {
+	struct irqaction *action = NULL;
 	const char *devname = NULL;
 
-	desc->istate &= ~IRQS_NMI;
+	scoped_guard(raw_spinlock_irqsave, &desc->lock) {
+		irq_nmi_teardown(desc);
 
-	if (!WARN_ON(desc->action == NULL)) {
-		irq_pm_remove_action(desc, desc->action);
-		devname = desc->action->name;
-		unregister_handler_proc(irq, desc->action);
+		desc->istate &= ~IRQS_NMI;
 
-		kfree(desc->action);
+		if (!WARN_ON(desc->action == NULL)) {
+			action = desc->action;
+			irq_pm_remove_action(desc, action);
+			devname = action->name;
+		}
 		desc->action = NULL;
+
+		irq_settings_clr_disable_unlazy(desc);
+		irq_shutdown_and_deactivate(desc);
 	}
 
-	irq_settings_clr_disable_unlazy(desc);
-	irq_shutdown_and_deactivate(desc);
+	irq_proc_update_valid(desc);
+
+	if (action)
+		unregister_handler_proc(irq, action);
+	kfree(action);
 
 	irq_release_resources(desc);
 
@@ -2058,6 +2066,7 @@ static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
 const void *free_nmi(unsigned int irq, void *dev_id)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
+	void *ret;
 
 	if (!desc || WARN_ON(!irq_is_nmi(desc)))
 		return NULL;
@@ -2069,8 +2078,6 @@ const void *free_nmi(unsigned int irq, void *dev_id)
 	if (WARN_ON(desc->depth == 0))
 		disable_nmi_nosync(irq);
 
-	guard(raw_spinlock_irqsave)(&desc->lock);
-	irq_nmi_teardown(desc);
 	return __cleanup_nmi(irq, desc);
 }
 
@@ -2320,13 +2327,14 @@ int request_nmi(unsigned int irq, irq_handler_t handler,
 		/* Setup NMI state */
 		desc->istate |= IRQS_NMI;
 		retval = irq_nmi_setup(desc);
-		if (retval) {
-			__cleanup_nmi(irq, desc);
-			return -EINVAL;
-		}
-		return 0;
 	}
 
+	if (retval) {
+		__cleanup_nmi(irq, desc);
+		return -EINVAL;
+	}
+	return 0;
+
 err_irq_setup:
 	irq_chip_pm_put(&desc->irq_data);
 err_out:
@@ -2430,8 +2438,10 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
 		*action_ptr = action->next;
 
 		/* Demote from NMI if we killed the last action */
-		if (!desc->action)
+		if (!desc->action) {
 			desc->istate &= ~IRQS_NMI;
+			irq_proc_update_valid(desc);
+		}
 	}
 
 	unregister_handler_proc(irq, action);
diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py
index 6ca7e32f35b0..418f3ece2f0f 100644
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -90,13 +90,6 @@ def show_irq_desc(prec, chip_width, irq):
 
     return text
 
-def show_irq_err_count(prec):
-    cnt = utils.gdb_eval_or_none("irq_err_count")
-    text = ""
-    if cnt is not None:
-        text += "%*s: %10u\n" % (prec, "ERR", cnt['counter'])
-    return text
-
 def x86_show_irqstat(prec, pfx, idx, desc):
     irq_stat = gdb.parse_and_eval("&irq_stat.counts[%d]" %idx)
     text = "%*s: " % (prec, pfx)
@@ -109,22 +102,18 @@ def x86_show_irqstat(prec, pfx, idx, desc):
 def x86_show_interupts(prec):
     info_type = gdb.lookup_type('struct irq_stat_info')
     info = gdb.parse_and_eval('irq_stat_info')
+    bitmap = gdb.parse_and_eval('irq_stat_count_show')
+    nbits = 8 * int(bitmap.type.sizeof)
 
     text = ""
     for idx in range(int(info.type.sizeof / info_type.sizeof)):
-        if info[idx]['skip_vector']:
+        show = bitmap[idx / nbits]
+        if not show & 1 << (idx % nbits):
             continue
         pfx = info[idx]['symbol'].string()
         desc = info[idx]['text'].string()
         text += x86_show_irqstat(prec, pfx, idx, desc)
 
-    text += show_irq_err_count(prec)
-
-    if constants.LX_CONFIG_X86_IO_APIC:
-        cnt = utils.gdb_eval_or_none("irq_mis_count")
-        if cnt is not None:
-            text += "%*s: %10u\n" % (prec, "MIS", cnt['counter'])
-
     return text
 
 def arm_common_show_interrupts(prec):



             reply	other threads:[~2026-03-31  7:25 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-31  7:25 Thomas Gleixner [this message]
2026-03-31  7:25 ` [patch V4 01/15] x86/irq: Optimize interrupts decimals printing Thomas Gleixner
2026-03-31  7:25 ` [patch V4 02/15] genirq/proc: Avoid formatting zero counts in /proc/interrupts Thomas Gleixner
2026-03-31  7:25 ` [patch V4 03/15] genirq/proc: Utilize irq_desc::tot_count to avoid evaluation Thomas Gleixner
2026-03-31  7:25 ` [patch V4 04/15] x86/irq: Make irqstats array based Thomas Gleixner
2026-03-31 14:45   ` Thomas Gleixner
2026-03-31  7:25 ` [patch V4 05/15] x86/irq: Suppress unlikely interrupt stats by default Thomas Gleixner
2026-03-31 21:12   ` Radu Rendec
2026-03-31  7:25 ` [patch V4 06/15] x86/irq: Move IOAPIC misrouted and PIC/APIC error counts into irq_stats Thomas Gleixner
2026-03-31 21:40   ` Radu Rendec
2026-03-31  7:26 ` [patch V4 07/15] scripts/gdb: Update x86 interrupts to the array based storage Thomas Gleixner
2026-03-31 14:47   ` Thomas Gleixner
2026-03-31  7:26 ` [patch V4 08/15] genirq: Expose nr_irqs in core code Thomas Gleixner
2026-03-31  7:26 ` [patch V4 09/15] genirq/manage: Make NMI cleanup RT safe Thomas Gleixner
2026-03-31  7:26 ` [patch V4 10/15] genirq: Cache the condition for /proc/interrupts exposure Thomas Gleixner
2026-03-31  7:26 ` [patch V4 11/15] genirq: Calculate precision only when required Thomas Gleixner
2026-03-31  7:26 ` [patch V4 12/15] genirq: Add rcuref count to struct irq_desc Thomas Gleixner
2026-03-31  7:26 ` [patch V4 13/15] genirq: Expose irq_find_desc_at_or_after() in core code Thomas Gleixner
2026-03-31 13:48   ` Dmitry Ilvokhin
2026-03-31  7:26 ` [patch V4 14/15] genirq/proc: Runtime size the chip name Thomas Gleixner
2026-03-31 14:22   ` Dmitry Ilvokhin
2026-03-31  7:26 ` [patch V4 15/15] genirq/proc: Speed up /proc/interrupts iteration Thomas Gleixner
2026-03-31 16:04   ` Dmitry Ilvokhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260331071453.172185305@kernel.org \
    --to=tglx@kernel.org \
    --cc=d@ilvokhin.com \
    --cc=florian.fainelli@broadcom.com \
    --cc=jan.kiszka@siemens.com \
    --cc=kbingham@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=mhklinux@outlook.com \
    --cc=radu@rendec.net \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox