All of lore.kernel.org
 help / color / mirror / Atom feed
From: Thomas Gleixner <tglx@kernel.org>
To: LKML <linux-kernel@vger.kernel.org>
Cc: x86@kernel.org, Michael Kelley <mhklinux@outlook.com>,
	Dmitry Ilvokhin <d@ilvokhin.com>, Radu Rendec <radu@rendec.net>,
	Jan Kiszka <jan.kiszka@siemens.com>,
	Kieran Bingham <kbingham@kernel.org>,
	Florian Fainelli <florian.fainelli@broadcom.com>,
	Marc Zyngier <maz@kernel.org>
Subject: [patch V4 00/15] Improve /proc/interrupts further
Date: Tue, 31 Mar 2026 09:25:24 +0200	[thread overview]
Message-ID: <20260331071453.172185305@kernel.org> (raw)

This is a follow up to v3 which can be found here:

  https://lore.kernel.org/20260326214345.019130211@kernel.org

The v1 cover letter contains a full analysis, explanation and numbers:

  https://lore.kernel.org/20260303150539.513068586@kernel.org

TLDR:

  - The performance of reading of /proc/interrupts has been improved
    piecewise over the years, but most of the low hanging fruit has been
    left on the table.

Changes vs. V2:

   - Reworked the NMI interrupt cleanup to be RT compatible. That was
     noticed when trying to address the next point.

   - Made per CPU and NMI type teardown update the valid for proc condition

   - Fixed the brainfart of writing to __ro_after_init memory by seperating
     the decision to emit a particular counter out into a simple bitmap and
     making the descriptor table const.

   - Updated the GDB script changes to the bitmap mechanism and removed the
     stale ERR/MIS prints which are not longer required.

   - Made irq_stat_init() a late initcall so it's guaranteed that the
     platform IPI and the posted MSI mechanisms have been initialized

   - Add the AMD/HYGON deferred MCE vector to the conditionals and omit it
     from emission when running on a CPU from other vendors

Delta patch against v3 is below.

The series applies on top of v7.0-rc3 and is also available via git:

    git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git irq-proc-v4

Thanks,

	tglx
---
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 25d22e33e58c..194dfff84cb1 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -47,6 +47,4 @@ void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
 #endif
 
-void irq_init_stats(void);
-
 #endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 0b3723cec0b9..de1c35fa5e75 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -76,7 +76,7 @@ struct irq_stat_info {
 #define IDS(idx, sym, txt) [IRQ_COUNT_##idx] =				\
 	{ .skip_vector = DEFAULT_SUPPRESSED_VECTOR, .symbol = sym, .text = txt }
 
-static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
+static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = {
 	ISS(NMI,			"NMI", "  Non-maskable interrupts\n"),
 #ifdef CONFIG_X86_LOCAL_APIC
 	ISS(APIC_TIMER,			"LOC", "  Local timer interrupts\n"),
@@ -128,26 +128,36 @@ static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
 #endif
 };
 
-void __init irq_init_stats(void)
+static DECLARE_BITMAP(irq_stat_count_show, IRQ_COUNT_MAX) __read_mostly;
+
+static int __init irq_init_stats(void)
 {
-	struct irq_stat_info *info = irq_stat_info;
+	const struct irq_stat_info *info = irq_stat_info;
 
 	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
-		if (info->skip_vector && info->skip_vector != DEFAULT_SUPPRESSED_VECTOR &&
-		    test_bit(info->skip_vector, system_vectors))
-			info->skip_vector = 0;
+		if (!info->skip_vector || (info->skip_vector != DEFAULT_SUPPRESSED_VECTOR &&
+					   test_bit(info->skip_vector, system_vectors)))
+			set_bit(i, irq_stat_count_show);
 	}
 
 #ifdef CONFIG_X86_LOCAL_APIC
 	if (!x86_platform_ipi_callback)
-		irq_stat_info[IRQ_COUNT_X86_PLATFORM_IPI].skip_vector = 1;
+		clear_bit(IRQ_COUNT_X86_PLATFORM_IPI, irq_stat_count_show);
 #endif
 
 #ifdef CONFIG_X86_POSTED_MSI
 	if (!posted_msi_enabled())
-		irq_stat_info[IRQ_COUNT_POSTED_MSI_NOTIFICATION].skip_vector = 1;
+		clear_bit(IRQ_COUNT_POSTED_MSI_NOTIFICATION, irq_stat_count_show);
+#endif
+
+#ifdef CONFIG_X86_MCE_AMD
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+		clear_bit(IRQ_COUNT_DEFERRED_ERROR, irq_stat_count_show);
 #endif
+	return 0;
 }
+late_initcall(irq_init_stats);
 
 /*
  * Used for default enabled counters to increment the stats and to enable the
@@ -156,8 +166,7 @@ void __init irq_init_stats(void)
 void irq_stat_inc_and_enable(enum irq_stat_counts which)
 {
 	this_cpu_inc(irq_stat.counts[which]);
-	/* Pairs with the READ_ONCE() in arch_show_interrupts() */
-	WRITE_ONCE(irq_stat_info[which].skip_vector, 0);
+	set_bit(which, irq_stat_count_show);
 }
 
 #ifdef CONFIG_PROC_FS
@@ -169,7 +178,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	const struct irq_stat_info *info = irq_stat_info;
 
 	for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
-		if (READ_ONCE(info->skip_vector))
+		if (!test_bit(i, irq_stat_count_show))
 			continue;
 
 		seq_printf(p, "%*s:", prec, info->symbol);
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 325c0ad8fb9c..6ab9eac64670 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -104,8 +104,6 @@ void __init native_init_IRQ(void)
 	if (!cpu_feature_enabled(X86_FEATURE_FRED))
 		idt_setup_apic_and_irq_gates();
 
-	irq_init_stats();
-
 	lapic_assign_system_vectors();
 
 	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) {
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index c4ebd17233f3..ae8b06e01948 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -2028,24 +2028,32 @@ const void *free_irq(unsigned int irq, void *dev_id)
 }
 EXPORT_SYMBOL(free_irq);
 
-/* This function must be called with desc->lock held */
 static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
 {
+	struct irqaction *action = NULL;
 	const char *devname = NULL;
 
-	desc->istate &= ~IRQS_NMI;
+	scoped_guard(raw_spinlock_irqsave, &desc->lock) {
+		irq_nmi_teardown(desc);
 
-	if (!WARN_ON(desc->action == NULL)) {
-		irq_pm_remove_action(desc, desc->action);
-		devname = desc->action->name;
-		unregister_handler_proc(irq, desc->action);
+		desc->istate &= ~IRQS_NMI;
 
-		kfree(desc->action);
+		if (!WARN_ON(desc->action == NULL)) {
+			action = desc->action;
+			irq_pm_remove_action(desc, action);
+			devname = action->name;
+		}
 		desc->action = NULL;
+
+		irq_settings_clr_disable_unlazy(desc);
+		irq_shutdown_and_deactivate(desc);
 	}
 
-	irq_settings_clr_disable_unlazy(desc);
-	irq_shutdown_and_deactivate(desc);
+	irq_proc_update_valid(desc);
+
+	if (action)
+		unregister_handler_proc(irq, action);
+	kfree(action);
 
 	irq_release_resources(desc);
 
@@ -2058,6 +2066,7 @@ static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
 const void *free_nmi(unsigned int irq, void *dev_id)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
+	void *ret;
 
 	if (!desc || WARN_ON(!irq_is_nmi(desc)))
 		return NULL;
@@ -2069,8 +2078,6 @@ const void *free_nmi(unsigned int irq, void *dev_id)
 	if (WARN_ON(desc->depth == 0))
 		disable_nmi_nosync(irq);
 
-	guard(raw_spinlock_irqsave)(&desc->lock);
-	irq_nmi_teardown(desc);
 	return __cleanup_nmi(irq, desc);
 }
 
@@ -2320,13 +2327,14 @@ int request_nmi(unsigned int irq, irq_handler_t handler,
 		/* Setup NMI state */
 		desc->istate |= IRQS_NMI;
 		retval = irq_nmi_setup(desc);
-		if (retval) {
-			__cleanup_nmi(irq, desc);
-			return -EINVAL;
-		}
-		return 0;
 	}
 
+	if (retval) {
+		__cleanup_nmi(irq, desc);
+		return -EINVAL;
+	}
+	return 0;
+
 err_irq_setup:
 	irq_chip_pm_put(&desc->irq_data);
 err_out:
@@ -2430,8 +2438,10 @@ static struct irqaction *__free_percpu_irq(unsigned int irq, void __percpu *dev_
 		*action_ptr = action->next;
 
 		/* Demote from NMI if we killed the last action */
-		if (!desc->action)
+		if (!desc->action) {
 			desc->istate &= ~IRQS_NMI;
+			irq_proc_update_valid(desc);
+		}
 	}
 
 	unregister_handler_proc(irq, action);
diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py
index 6ca7e32f35b0..418f3ece2f0f 100644
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -90,13 +90,6 @@ def show_irq_desc(prec, chip_width, irq):
 
     return text
 
-def show_irq_err_count(prec):
-    cnt = utils.gdb_eval_or_none("irq_err_count")
-    text = ""
-    if cnt is not None:
-        text += "%*s: %10u\n" % (prec, "ERR", cnt['counter'])
-    return text
-
 def x86_show_irqstat(prec, pfx, idx, desc):
     irq_stat = gdb.parse_and_eval("&irq_stat.counts[%d]" %idx)
     text = "%*s: " % (prec, pfx)
@@ -109,22 +102,18 @@ def x86_show_irqstat(prec, pfx, idx, desc):
 def x86_show_interupts(prec):
     info_type = gdb.lookup_type('struct irq_stat_info')
     info = gdb.parse_and_eval('irq_stat_info')
+    bitmap = gdb.parse_and_eval('irq_stat_count_show')
+    nbits = 8 * int(bitmap.type.sizeof)
 
     text = ""
     for idx in range(int(info.type.sizeof / info_type.sizeof)):
-        if info[idx]['skip_vector']:
+        show = bitmap[idx / nbits]
+        if not show & 1 << (idx % nbits):
             continue
         pfx = info[idx]['symbol'].string()
         desc = info[idx]['text'].string()
         text += x86_show_irqstat(prec, pfx, idx, desc)
 
-    text += show_irq_err_count(prec)
-
-    if constants.LX_CONFIG_X86_IO_APIC:
-        cnt = utils.gdb_eval_or_none("irq_mis_count")
-        if cnt is not None:
-            text += "%*s: %10u\n" % (prec, "MIS", cnt['counter'])
-
     return text
 
 def arm_common_show_interrupts(prec):



             reply	other threads:[~2026-03-31  7:25 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-31  7:25 Thomas Gleixner [this message]
2026-03-31  7:25 ` [patch V4 01/15] x86/irq: Optimize interrupts decimals printing Thomas Gleixner
2026-03-31  7:25 ` [patch V4 02/15] genirq/proc: Avoid formatting zero counts in /proc/interrupts Thomas Gleixner
2026-03-31  7:25 ` [patch V4 03/15] genirq/proc: Utilize irq_desc::tot_count to avoid evaluation Thomas Gleixner
2026-03-31  7:25 ` [patch V4 04/15] x86/irq: Make irqstats array based Thomas Gleixner
2026-03-31 14:45   ` Thomas Gleixner
2026-03-31  7:25 ` [patch V4 05/15] x86/irq: Suppress unlikely interrupt stats by default Thomas Gleixner
2026-03-31 21:12   ` Radu Rendec
2026-03-31  7:25 ` [patch V4 06/15] x86/irq: Move IOAPIC misrouted and PIC/APIC error counts into irq_stats Thomas Gleixner
2026-03-31 21:40   ` Radu Rendec
2026-03-31  7:26 ` [patch V4 07/15] scripts/gdb: Update x86 interrupts to the array based storage Thomas Gleixner
2026-03-31 14:47   ` Thomas Gleixner
2026-03-31  7:26 ` [patch V4 08/15] genirq: Expose nr_irqs in core code Thomas Gleixner
2026-03-31  7:26 ` [patch V4 09/15] genirq/manage: Make NMI cleanup RT safe Thomas Gleixner
2026-03-31  7:26 ` [patch V4 10/15] genirq: Cache the condition for /proc/interrupts exposure Thomas Gleixner
2026-03-31  7:26 ` [patch V4 11/15] genirq: Calculate precision only when required Thomas Gleixner
2026-03-31  7:26 ` [patch V4 12/15] genirq: Add rcuref count to struct irq_desc Thomas Gleixner
2026-03-31  7:26 ` [patch V4 13/15] genirq: Expose irq_find_desc_at_or_after() in core code Thomas Gleixner
2026-03-31 13:48   ` Dmitry Ilvokhin
2026-03-31  7:26 ` [patch V4 14/15] genirq/proc: Runtime size the chip name Thomas Gleixner
2026-03-31 14:22   ` Dmitry Ilvokhin
2026-03-31  7:26 ` [patch V4 15/15] genirq/proc: Speed up /proc/interrupts iteration Thomas Gleixner
2026-03-31 16:04   ` Dmitry Ilvokhin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260331071453.172185305@kernel.org \
    --to=tglx@kernel.org \
    --cc=d@ilvokhin.com \
    --cc=florian.fainelli@broadcom.com \
    --cc=jan.kiszka@siemens.com \
    --cc=kbingham@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=maz@kernel.org \
    --cc=mhklinux@outlook.com \
    --cc=radu@rendec.net \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.