* [patch v2 00/14] Improve /proc/interrupts further and add a binary interface
@ 2026-03-20 13:21 Thomas Gleixner
2026-03-20 13:21 ` [patch v2 01/14] x86/irq: Optimize interrupts decimals printing Thomas Gleixner
` (14 more replies)
0 siblings, 15 replies; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:21 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
This is a follow up to v1 which can be found here:
https://lore.kernel.org/20260303150539.513068586@kernel.org
The v1 cover letter contains a full analysis, explanation and numbers.
TLDR:
- The performance of reading of /proc/interrupts has been improved
piecewise over the years, but most of the low hanging fruit has been
left on the table.
- For a long time a binary readout interface was considered to be the
better option, but it never materialized.
The series fixes the real big performance issues and provides a design
study for a binary interface.
Changes vs. V1:
- Addressed the review comments from Dmitry and Michael
- Slightly changed the approach to skip x86 vectors
- Picked up tags where appropriate
The last four patches related to the binary interface need obviously some
thought vs. the interface and are therefore still marked RFC.
Delta patch against v1 is below.
The series applies on top of v7.0-rc3 and is also available via git:
git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git irq/core
Thanks,
tglx
---
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index b0186f22a19e..544e6b7bb499 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -17,8 +17,8 @@ enum {
#ifdef CONFIG_SMP
IRQ_COUNT_RESCHEDULE,
IRQ_COUNT_CALL_FUNCTION,
- IRQ_COUNT_TLB,
#endif
+ IRQ_COUNT_TLB,
#ifdef CONFIG_X86_THERMAL_VECTOR
IRQ_COUNT_THERMAL_APIC,
#endif
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h
index 194dfff84cb1..25d22e33e58c 100644
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -47,4 +47,6 @@ void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif
+void irq_init_stats(void);
+
#endif /* _ASM_X86_IRQ_H */
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 6af2c622bfb1..8f8485a8abdf 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -191,7 +191,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_stimer0)
{
struct pt_regs *old_regs = set_irq_regs(regs);
- inc_irq_stat(HYPERV_STIMER0)
+ inc_irq_stat(HYPERV_STIMER0);
if (hv_stimer0_handler)
hv_stimer0_handler();
add_interrupt_randomness(HYPERV_STIMER0_VECTOR);
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index bfa189e5a3b6..2bd8c08f8d91 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -63,7 +63,7 @@ void ack_bad_irq(unsigned int irq)
}
struct irq_stat_info {
- unsigned int test_vector;
+ unsigned int skip_vector;
const char *symbol;
const char *text;
};
@@ -71,9 +71,9 @@ struct irq_stat_info {
#define ISS(idx, sym, txt) [IRQ_COUNT_##idx] = { .symbol = sym, .text = txt }
#define ITS(idx, sym, txt) [IRQ_COUNT_##idx] = \
- { .test_vector = idx## _VECTOR, .symbol = sym, .text = txt }
+ { .skip_vector = idx## _VECTOR, .symbol = sym, .text = txt }
-static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = {
+static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
ISS(NMI, "NMI", " Non-maskable interrupts\n"),
#ifdef CONFIG_X86_LOCAL_APIC
ISS(APIC_TIMER, "LOC", " Local timer interrupts\n"),
@@ -86,8 +86,8 @@ static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = {
#ifdef CONFIG_SMP
ISS(RESCHEDULE, "RES", " Rescheduling interrupts\n"),
ISS(CALL_FUNCTION, "CAL", " Function call interrupts\n"),
- ISS(TLB, "TLB", " TLB shootdowns\n"),
#endif
+ ISS(TLB, "TLB", " TLB shootdowns\n"),
#ifdef CONFIG_X86_THERMAL_VECTOR
ISS(THERMAL_APIC, "TRM", " Thermal event interrupt\n"),
#endif
@@ -105,7 +105,7 @@ static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = {
ITS(HYPERVISOR_CALLBACK, "HYP", " Hypervisor callback interrupts\n"),
#endif
#if IS_ENABLED(CONFIG_HYPERV)
- ITS(HYPERV_REENLIGHTMENT, "HRE", " Hyper-V reenlightment interrupts\n"),
+ ITS(HYPERV_REENLIGHTENMENT, "HRE", " Hyper-V reenlightment interrupts\n"),
ITS(HYPERV_STIMER0, "HVS", " Hyper-V stimer0 interrupts\n"),
#endif
#if IS_ENABLED(CONFIG_KVM)
@@ -121,6 +121,24 @@ static const struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] = {
#endif
};
+void __init irq_init_stats(void)
+{
+ struct irq_stat_info *info = irq_stat_info;
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
+ if (info->skip_vector && test_bit(info->skip_vector, system_vectors))
+ info->skip_vector = 0;
+ }
+
+ if (!x86_platform_ipi_callback)
+ irq_stat_info[IRQ_COUNT_X86_PLATFORM_IPI].skip_vector = 1;
+
+#ifdef CONFIG_X86_POSTED_MSI
+ if (!posted_msi_enabled())
+ irq_stat_info[IRQ_COUNT_X86_POSTED_MSI].skip_vector = 1;
+#endif
+}
+
/*
* /proc/interrupts printing for arch specific interrupts
*/
@@ -129,7 +147,7 @@ int arch_show_interrupts(struct seq_file *p, int prec)
const struct irq_stat_info *info = irq_stat_info;
for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
- if (info->test_vector && !test_bit(info->test_vector, system_vectors))
+ if (info->skip_vector)
continue;
seq_printf(p, "%*s:", prec, info->symbol);
@@ -137,9 +155,9 @@ int arch_show_interrupts(struct seq_file *p, int prec)
seq_puts(p, info->text);
}
- seq_printf(p, "ERR: %10u\n", (unsigned int) atomic_read(&irq_err_count));
+ seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
if (IS_ENABLED(CONFIG_X86_IO_APIC))
- seq_printf(p, "MIS: %10u\n", (unsigned int) atomic_read(&irq_mis_count));
+ seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
return 0;
}
@@ -245,7 +263,7 @@ DEFINE_IDTENTRY_IRQ(common_interrupt)
#ifdef CONFIG_X86_LOCAL_APIC
/* Function pointer for generic interrupt vector handling */
-void (*x86_platform_ipi_callback)(void) = NULL;
+void (*x86_platform_ipi_callback)(void) __ro_after_init = NULL;
/*
* Handler for X86_PLATFORM_IPI_VECTOR.
*/
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 6ab9eac64670..325c0ad8fb9c 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -104,6 +104,8 @@ void __init native_init_IRQ(void)
if (!cpu_feature_enabled(X86_FEATURE_FRED))
idt_setup_apic_and_irq_gates();
+ irq_init_stats();
+
lapic_assign_system_vectors();
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) {
diff --git a/include/linux/irq.h b/include/linux/irq.h
index b44c90aabe53..9d0929b1feaf 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -99,7 +99,7 @@ enum {
IRQ_DISABLE_UNLAZY = (1 << 19),
IRQ_HIDDEN = (1 << 20),
IRQ_NO_DEBUG = (1 << 21),
- IRQF_RESERVED = (1 << 22),
+ IRQ_RESERVED = (1 << 22),
};
#define IRQF_MODIFY_MASK \
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 3859cef24d58..37eec0337867 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -139,7 +139,6 @@ extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc);
extern void register_handler_proc(unsigned int irq, struct irqaction *action);
extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
void irq_proc_update_valid(struct irq_desc *desc);
-void irq_proc_calc_prec(void);
#else
static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
static inline void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) { }
@@ -148,6 +147,11 @@ static inline void register_handler_proc(unsigned int irq,
static inline void unregister_handler_proc(unsigned int irq,
struct irqaction *action) { }
static inline void irq_proc_update_valid(struct irq_desc *desc) { }
+#endif
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_GENERIC_IRQ_SHOW)
+void irq_proc_calc_prec(void);
+#else
static inline void irq_proc_calc_prec(void) { }
#endif
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 9f83e2bc86cf..084c84ca457d 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -477,7 +477,7 @@ void irq_proc_calc_prec(void)
}
#define ZSTR1 " 0"
-#define ZSTR1_LEN 11
+#define ZSTR1_LEN (sizeof(ZSTR1) - 1)
#define ZSTR16 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 \
ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1
#define ZSTR256 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 \
diff --git a/kernel/irq/settings.h b/kernel/irq/settings.h
index d15410b4a089..0a0c027a5d34 100644
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -18,6 +18,7 @@ enum {
_IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY,
_IRQ_HIDDEN = IRQ_HIDDEN,
_IRQ_NO_DEBUG = IRQ_NO_DEBUG,
+ _IRQ_PROC_VALID = IRQ_RESERVED,
_IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
};
@@ -34,12 +35,10 @@ enum {
#define IRQ_DISABLE_UNLAZY GOT_YOU_MORON
#define IRQ_HIDDEN GOT_YOU_MORON
#define IRQ_NO_DEBUG GOT_YOU_MORON
+#define IRQ_RESERVED GOT_YOU_MORON
#undef IRQF_MODIFY_MASK
#define IRQF_MODIFY_MASK GOT_YOU_MORON
-#define _IRQ_PROC_VALID IRQF_RESERVED
-#undef IRQF_RESERVED
-
static inline void
irq_settings_clr_and_set(struct irq_desc *desc, u32 clr, u32 set)
{
^ permalink raw reply related [flat|nested] 43+ messages in thread
* [patch v2 01/14] x86/irq: Optimize interrupts decimals printing
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
@ 2026-03-20 13:21 ` Thomas Gleixner
2026-03-21 16:10 ` Radu Rendec
2026-03-20 13:21 ` [patch v2 02/14] genirq/proc: Avoid formatting zero counts in /proc/interrupts Thomas Gleixner
` (13 subsequent siblings)
14 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:21 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
From: Dmitry Ilvokhin <d@ilvokhin.com>
Monitoring tools periodically scan /proc/interrupts to export metrics as a
timeseries for future analysis and investigation.
In large fleets, /proc/interrupts is polled (often every few seconds) on
every machine. The cumulative overhead adds up quickly across thousands
of nodes, so reducing the cost of generating these stats does have a
measurable operational impact. With the ongoing trend toward higher core
counts per machine, this cost becomes even more noticeable over time,
since interrupt counters are per-CPU. In Meta's fleet, we have observed
this overhead at scale.
Although a binary /proc interface would be a better long-term solution
due to lower formatting (kernel side) and parsing (userspace side)
overhead, the text interface will remain in use for some time, even if
better solutions will be available. Optimizing the /proc/interrupts
printing code is therefore still beneficial.
Function seq_printf() supports rich format string for decimals printing,
but it doesn't required for printing /proc/interrupts per CPU counters,
seq_put_decimal_ull_width() function can be used instead to print per
CPU counters, because very limited formatting is required for this case.
Similar optimization idea is already used in show_interrupts().
Performance counter stats (truncated) for 'sh -c cat /proc/interrupts
Before:
3.42 msec task-clock # 0.802 CPUs utilized ( +- 0.05% )
1 context-switches # 291.991 /sec ( +- 0.74% )
0 cpu-migrations # 0.000 /sec
343 page-faults # 100.153 K/sec ( +- 0.01% )
8,932,242 instructions # 1.66 insn per cycle ( +- 0.34% )
5,374,427 cycles # 1.569 GHz ( +- 0.04% )
1,483,154 branches # 433.068 M/sec ( +- 0.22% )
28,768 branch-misses # 1.94% of all branches ( +- 0.31% )
0.00427182 +- 0.00000215 seconds time elapsed ( +- 0.05% )
After:
2.39 msec task-clock # 0.796 CPUs utilized ( +- 0.06% )
1 context-switches # 418.541 /sec ( +- 0.70% )
0 cpu-migrations # 0.000 /sec
343 page-faults # 143.560 K/sec ( +- 0.01% )
7,020,982 instructions # 1.30 insn per cycle ( +- 0.52% )
5,397,266 cycles # 2.259 GHz ( +- 0.06% )
1,569,648 branches # 656.962 M/sec ( +- 0.08% )
25,419 branch-misses # 1.62% of all branches ( +- 0.72% )
0.00299996 +- 0.00000206 seconds time elapsed ( +- 0.07% )
Relative speed up in time elapsed is around 29%.
[ tglx: Fixed it up so it applies to current mainline ]
Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Link: https://patch.msgid.link/aQj5mGZ6_BBlAm3B@shell.ilvokhin.com
---
Changes v2:
- Expanded commit message: add more rationale for the proposed change.
- Renamed helper put_spaced_decimal() -> put_decimal() primarely to make
checkpatch.pl --strict pass.
arch/x86/kernel/irq.c | 112 ++++++++++++++++++++++++++------------------------
1 file changed, 59 insertions(+), 53 deletions(-)
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -62,6 +62,18 @@ void ack_bad_irq(unsigned int irq)
apic_eoi();
}
+/*
+ * A helper routine for putting space and decimal number without overhead
+ * from rich format of printf().
+ */
+static void put_decimal(struct seq_file *p, unsigned long long num)
+{
+ const char *delimiter = " ";
+ unsigned int width = 10;
+
+ seq_put_decimal_ull_width(p, delimiter, num, width);
+}
+
#define irq_stats(x) (&per_cpu(irq_stat, x))
/*
* /proc/interrupts printing for arch specific interrupts
@@ -70,103 +82,101 @@ int arch_show_interrupts(struct seq_file
{
int j;
- seq_printf(p, "%*s: ", prec, "NMI");
+ seq_printf(p, "%*s:", prec, "NMI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
+ put_decimal(p, irq_stats(j)->__nmi_count);
seq_puts(p, " Non-maskable interrupts\n");
#ifdef CONFIG_X86_LOCAL_APIC
- seq_printf(p, "%*s: ", prec, "LOC");
+ seq_printf(p, "%*s:", prec, "LOC");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
+ put_decimal(p, irq_stats(j)->apic_timer_irqs);
seq_puts(p, " Local timer interrupts\n");
- seq_printf(p, "%*s: ", prec, "SPU");
+ seq_printf(p, "%*s:", prec, "SPU");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
+ put_decimal(p, irq_stats(j)->irq_spurious_count);
seq_puts(p, " Spurious interrupts\n");
- seq_printf(p, "%*s: ", prec, "PMI");
+ seq_printf(p, "%*s:", prec, "PMI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
+ put_decimal(p, irq_stats(j)->apic_perf_irqs);
seq_puts(p, " Performance monitoring interrupts\n");
- seq_printf(p, "%*s: ", prec, "IWI");
+ seq_printf(p, "%*s:", prec, "IWI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
+ put_decimal(p, irq_stats(j)->apic_irq_work_irqs);
seq_puts(p, " IRQ work interrupts\n");
- seq_printf(p, "%*s: ", prec, "RTR");
+ seq_printf(p, "%*s:", prec, "RTR");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
+ put_decimal(p, irq_stats(j)->icr_read_retry_count);
seq_puts(p, " APIC ICR read retries\n");
if (x86_platform_ipi_callback) {
- seq_printf(p, "%*s: ", prec, "PLT");
+ seq_printf(p, "%*s:", prec, "PLT");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
+ put_decimal(p, irq_stats(j)->x86_platform_ipis);
seq_puts(p, " Platform interrupts\n");
}
#endif
#ifdef CONFIG_SMP
- seq_printf(p, "%*s: ", prec, "RES");
+ seq_printf(p, "%*s:", prec, "RES");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
+ put_decimal(p, irq_stats(j)->irq_resched_count);
seq_puts(p, " Rescheduling interrupts\n");
- seq_printf(p, "%*s: ", prec, "CAL");
+ seq_printf(p, "%*s:", prec, "CAL");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+ put_decimal(p, irq_stats(j)->irq_call_count);
seq_puts(p, " Function call interrupts\n");
- seq_printf(p, "%*s: ", prec, "TLB");
+ seq_printf(p, "%*s:", prec, "TLB");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
+ put_decimal(p, irq_stats(j)->irq_tlb_count);
seq_puts(p, " TLB shootdowns\n");
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
- seq_printf(p, "%*s: ", prec, "TRM");
+ seq_printf(p, "%*s:", prec, "TRM");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
+ put_decimal(p, irq_stats(j)->irq_thermal_count);
seq_puts(p, " Thermal event interrupts\n");
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
- seq_printf(p, "%*s: ", prec, "THR");
+ seq_printf(p, "%*s:", prec, "THR");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
+ put_decimal(p, irq_stats(j)->irq_threshold_count);
seq_puts(p, " Threshold APIC interrupts\n");
#endif
#ifdef CONFIG_X86_MCE_AMD
- seq_printf(p, "%*s: ", prec, "DFR");
+ seq_printf(p, "%*s:", prec, "DFR");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
+ put_decimal(p, irq_stats(j)->irq_deferred_error_count);
seq_puts(p, " Deferred Error APIC interrupts\n");
#endif
#ifdef CONFIG_X86_MCE
- seq_printf(p, "%*s: ", prec, "MCE");
+ seq_printf(p, "%*s:", prec, "MCE");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
+ put_decimal(p, per_cpu(mce_exception_count, j));
seq_puts(p, " Machine check exceptions\n");
- seq_printf(p, "%*s: ", prec, "MCP");
+ seq_printf(p, "%*s:", prec, "MCP");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
+ put_decimal(p, per_cpu(mce_poll_count, j));
seq_puts(p, " Machine check polls\n");
#endif
#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
- seq_printf(p, "%*s: ", prec, "HYP");
+ seq_printf(p, "%*s:", prec, "HYP");
for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- irq_stats(j)->irq_hv_callback_count);
+ put_decimal(p, irq_stats(j)->irq_hv_callback_count);
seq_puts(p, " Hypervisor callback interrupts\n");
}
#endif
#if IS_ENABLED(CONFIG_HYPERV)
if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
- seq_printf(p, "%*s: ", prec, "HRE");
+ seq_printf(p, "%*s:", prec, "HRE");
for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- irq_stats(j)->irq_hv_reenlightenment_count);
+ put_decimal(p,
+ irq_stats(j)->irq_hv_reenlightenment_count);
seq_puts(p, " Hyper-V reenlightenment interrupts\n");
}
if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) {
- seq_printf(p, "%*s: ", prec, "HVS");
+ seq_printf(p, "%*s:", prec, "HVS");
for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- irq_stats(j)->hyperv_stimer0_count);
+ put_decimal(p, irq_stats(j)->hyperv_stimer0_count);
seq_puts(p, " Hyper-V stimer0 interrupts\n");
}
#endif
@@ -175,35 +185,31 @@ int arch_show_interrupts(struct seq_file
seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
#endif
#if IS_ENABLED(CONFIG_KVM)
- seq_printf(p, "%*s: ", prec, "PIN");
+ seq_printf(p, "%*s:", prec, "PIN");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
+ put_decimal(p, irq_stats(j)->kvm_posted_intr_ipis);
seq_puts(p, " Posted-interrupt notification event\n");
- seq_printf(p, "%*s: ", prec, "NPI");
+ seq_printf(p, "%*s:", prec, "NPI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- irq_stats(j)->kvm_posted_intr_nested_ipis);
+ put_decimal(p, irq_stats(j)->kvm_posted_intr_nested_ipis);
seq_puts(p, " Nested posted-interrupt event\n");
- seq_printf(p, "%*s: ", prec, "PIW");
+ seq_printf(p, "%*s:", prec, "PIW");
for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- irq_stats(j)->kvm_posted_intr_wakeup_ipis);
+ put_decimal(p, irq_stats(j)->kvm_posted_intr_wakeup_ipis);
seq_puts(p, " Posted-interrupt wakeup event\n");
#endif
#ifdef CONFIG_GUEST_PERF_EVENTS
- seq_printf(p, "%*s: ", prec, "VPMI");
+ seq_printf(p, "%*s:", prec, "VPMI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- irq_stats(j)->perf_guest_mediated_pmis);
+ put_decimal(p, irq_stats(j)->perf_guest_mediated_pmis);
seq_puts(p, " Perf Guest Mediated PMI\n");
#endif
#ifdef CONFIG_X86_POSTED_MSI
- seq_printf(p, "%*s: ", prec, "PMN");
+ seq_printf(p, "%*s:", prec, "PMN");
for_each_online_cpu(j)
- seq_printf(p, "%10u ",
- irq_stats(j)->posted_msi_notification_count);
+ put_decimal(p, irq_stats(j)->posted_msi_notification_count);
seq_puts(p, " Posted MSI notification event\n");
#endif
return 0;
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 02/14] genirq/proc: Avoid formatting zero counts in /proc/interrupts
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
2026-03-20 13:21 ` [patch v2 01/14] x86/irq: Optimize interrupts decimals printing Thomas Gleixner
@ 2026-03-20 13:21 ` Thomas Gleixner
2026-03-21 16:38 ` Radu Rendec
2026-03-20 13:21 ` [patch v2 03/14] genirq/proc: Utilize irq_desc::tot_count to avoid evaluation Thomas Gleixner
` (12 subsequent siblings)
14 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:21 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
A large portion of interrupt count entries are zero. There is no point in
formatting the zero value as it is way cheeper to just emit a constant
string.
Collect the number of consecutive zero counts and emit them in one go
before a non-zero count and at the end of the line.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
V2: Use sizeof() for ZSTR1_LEN - Dmitry
---
include/linux/interrupt.h | 1 +
kernel/irq/proc.c | 42 +++++++++++++++++++++++++++++++++++++-----
2 files changed, 38 insertions(+), 5 deletions(-)
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -864,6 +864,7 @@ static inline void init_irq_proc(void)
struct seq_file;
int show_interrupts(struct seq_file *p, void *v);
int arch_show_interrupts(struct seq_file *p, int prec);
+void irq_proc_emit_counts(struct seq_file *p, unsigned int __percpu *cnts);
extern int early_irq_init(void);
extern int arch_probe_nr_irqs(void);
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -450,6 +450,42 @@ int __weak arch_show_interrupts(struct s
# define ACTUAL_NR_IRQS irq_get_nr_irqs()
#endif
+#define ZSTR1 " 0"
+#define ZSTR1_LEN (sizeof(ZSTR1) - 1)
+#define ZSTR16 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 \
+ ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1
+#define ZSTR256 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 \
+ ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16
+
+static inline void irq_proc_emit_zero_counts(struct seq_file *p, unsigned int zeros)
+{
+ if (!zeros)
+ return;
+
+ for (unsigned int n = min(zeros, 256); n; zeros -= n, n = min(zeros, 256))
+ seq_write(p, ZSTR256, n * ZSTR1_LEN);
+}
+
+static inline unsigned int irq_proc_emit_count(struct seq_file *p, unsigned int cnt,
+ unsigned int zeros)
+{
+ if (!cnt)
+ return zeros + 1;
+
+ irq_proc_emit_zero_counts(p, zeros);
+ seq_put_decimal_ull_width(p, " ", cnt, 10);
+ return 0;
+}
+
+void irq_proc_emit_counts(struct seq_file *p, unsigned int __percpu *cnts)
+{
+ unsigned int cpu, zeros = 0;
+
+ for_each_online_cpu(cpu)
+ zeros = irq_proc_emit_count(p, per_cpu(*cnts, cpu), zeros);
+ irq_proc_emit_zero_counts(p, zeros);
+}
+
int show_interrupts(struct seq_file *p, void *v)
{
const unsigned int nr_irqs = irq_get_nr_irqs();
@@ -485,11 +521,7 @@ int show_interrupts(struct seq_file *p,
return 0;
seq_printf(p, "%*d:", prec, i);
- for_each_online_cpu(j) {
- unsigned int cnt = desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, j) : 0;
-
- seq_put_decimal_ull_width(p, " ", cnt, 10);
- }
+ irq_proc_emit_counts(p, &desc->kstat_irqs->cnt);
seq_putc(p, ' ');
guard(raw_spinlock_irq)(&desc->lock);
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 03/14] genirq/proc: Utilize irq_desc::tot_count to avoid evaluation
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
2026-03-20 13:21 ` [patch v2 01/14] x86/irq: Optimize interrupts decimals printing Thomas Gleixner
2026-03-20 13:21 ` [patch v2 02/14] genirq/proc: Avoid formatting zero counts in /proc/interrupts Thomas Gleixner
@ 2026-03-20 13:21 ` Thomas Gleixner
2026-03-22 19:59 ` Radu Rendec
2026-03-20 13:21 ` [patch v2 04/14] x86/irq: Make irqstats array based Thomas Gleixner
` (11 subsequent siblings)
14 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:21 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
Interrupts which are not marked per CPU increment not only the per CPU
statistics, but also the accumulation counter irq_desc::tot_count.
Change the counter to type unsigned long so it does not produce sporadic
zeros due to wrap arounds on 64-bit machines and do a quick check for non
per CPU interrupts. If the counter is zero, then simply emit a full set of
zero strings. That spares the evaluation of the per CPU counters completely
for interrupts with zero events.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
include/linux/irqdesc.h | 6 +++---
kernel/irq/proc.c | 11 ++++++++++-
2 files changed, 13 insertions(+), 4 deletions(-)
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -52,8 +52,8 @@ struct irq_redirect {
* @depth: disable-depth, for nested irq_disable() calls
* @wake_depth: enable depth, for multiple irq_set_irq_wake() callers
* @tot_count: stats field for non-percpu irqs
- * @irq_count: stats field to detect stalled irqs
* @last_unhandled: aging timer for unhandled count
+ * @irq_count: stats field to detect stalled irqs
* @irqs_unhandled: stats field for spurious unhandled interrupts
* @threads_handled: stats field for deferred spurious detection of threaded handlers
* @threads_handled_last: comparator field for deferred spurious detection of threaded handlers
@@ -87,9 +87,9 @@ struct irq_desc {
unsigned int core_internal_state__do_not_mess_with_it;
unsigned int depth; /* nested irq disables */
unsigned int wake_depth; /* nested wake enables */
- unsigned int tot_count;
- unsigned int irq_count; /* For detecting broken IRQs */
+ unsigned long tot_count;
unsigned long last_unhandled; /* Aging timer for unhandled count */
+ unsigned int irq_count; /* For detecting broken IRQs */
unsigned int irqs_unhandled;
atomic_t threads_handled;
int threads_handled_last;
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -521,7 +521,16 @@ int show_interrupts(struct seq_file *p,
return 0;
seq_printf(p, "%*d:", prec, i);
- irq_proc_emit_counts(p, &desc->kstat_irqs->cnt);
+
+ /*
+ * Always output per CPU interrupts. Output device interrupts only when
+ * desc::tot_count is not zero.
+ */
+ if (irq_settings_is_per_cpu(desc) || irq_settings_is_per_cpu_devid(desc) ||
+ data_race(desc->tot_count))
+ irq_proc_emit_counts(p, &desc->kstat_irqs->cnt);
+ else
+ irq_proc_emit_zero_counts(p, num_online_cpus());
seq_putc(p, ' ');
guard(raw_spinlock_irq)(&desc->lock);
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (2 preceding siblings ...)
2026-03-20 13:21 ` [patch v2 03/14] genirq/proc: Utilize irq_desc::tot_count to avoid evaluation Thomas Gleixner
@ 2026-03-20 13:21 ` Thomas Gleixner
2026-03-20 16:39 ` Michael Kelley
2026-03-23 19:24 ` Radu Rendec
2026-03-20 13:21 ` [patch v2 05/14] genirq: Expose nr_irqs in core code Thomas Gleixner
` (10 subsequent siblings)
14 siblings, 2 replies; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:21 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
Having the x86 specific interrupt statistics as a data structure with
individual members instead of an array is just stupid as it requires
endless copy and paste in arch_show_interrupts() and arch_irq_stat_cpu(),
where the latter does not even take the latest interrupt additions into
account. The resulting #ifdef orgy is just disgusting.
Convert it to an array of counters, which does not make a difference in the
actual interrupt hotpath increment as the array index is constant and
therefore not any different than the member based access.
But in arch_show_interrupts() and arch_irq_stat_cpu() this just turns into
a loop, which reduces the text size by ~2k (~12%):
text data bss dec hex filename
19643 15250 904 35797 8bd5 ../build/arch/x86/kernel/irq.o
17355 15250 904 33509 82e5 ../build/arch/x86/kernel/irq.o
Adding a new vector or software counter only requires to update the table
and everything just works. Using the core provided emit function which
speeds up 0 outputs makes it significantly faster.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
V2: Simplified and extended vector skip mechanism
Fixup the typoes - Micheal, Dmitry
Added the lost precision back for ERR/MIS - Dmitry
---
arch/x86/events/amd/core.c | 2
arch/x86/events/amd/ibs.c | 2
arch/x86/events/core.c | 2
arch/x86/events/intel/core.c | 2
arch/x86/events/intel/knc.c | 2
arch/x86/events/intel/p4.c | 2
arch/x86/events/zhaoxin/core.c | 2
arch/x86/hyperv/hv_init.c | 2
arch/x86/include/asm/hardirq.h | 69 +++++-----
arch/x86/include/asm/irq.h | 2
arch/x86/include/asm/mce.h | 3
arch/x86/kernel/apic/apic.c | 4
arch/x86/kernel/apic/ipi.c | 2
arch/x86/kernel/cpu/acrn.c | 2
arch/x86/kernel/cpu/mce/amd.c | 2
arch/x86/kernel/cpu/mce/core.c | 8 -
arch/x86/kernel/cpu/mce/threshold.c | 2
arch/x86/kernel/cpu/mshyperv.c | 4
arch/x86/kernel/irq.c | 247 ++++++++++++------------------------
arch/x86/kernel/irq_work.c | 2
arch/x86/kernel/irqinit.c | 2
arch/x86/kernel/kvm.c | 2
arch/x86/kernel/nmi.c | 4
arch/x86/kernel/smp.c | 6
arch/x86/mm/tlb.c | 2
arch/x86/xen/enlighten_hvm.c | 2
arch/x86/xen/enlighten_pv.c | 2
arch/x86/xen/smp.c | 6
arch/x86/xen/smp_pv.c | 2
29 files changed, 158 insertions(+), 233 deletions(-)
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -1032,7 +1032,7 @@ static int amd_pmu_v2_handle_irq(struct
* Unmasking the LVTPC is not required as the Mask (M) bit of the LVT
* PMI entry is not set by the local APIC when a PMC overflow occurs
*/
- inc_irq_stat(apic_perf_irqs);
+ inc_irq_stat(APIC_PERF);
done:
cpuc->enabled = pmu_enabled;
--- a/arch/x86/events/amd/ibs.c
+++ b/arch/x86/events/amd/ibs.c
@@ -1403,7 +1403,7 @@ perf_ibs_nmi_handler(unsigned int cmd, s
handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
if (handled)
- inc_irq_stat(apic_perf_irqs);
+ inc_irq_stat(APIC_PERF);
perf_sample_event_took(sched_clock() - stamp);
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1747,7 +1747,7 @@ int x86_pmu_handle_irq(struct pt_regs *r
}
if (handled)
- inc_irq_stat(apic_perf_irqs);
+ inc_irq_stat(APIC_PERF);
return handled;
}
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3504,7 +3504,7 @@ static int handle_pmi_common(struct pt_r
int bit;
int handled = 0;
- inc_irq_stat(apic_perf_irqs);
+ inc_irq_stat(APIC_PERF);
/*
* Ignore a range of extra bits in status that do not indicate
--- a/arch/x86/events/intel/knc.c
+++ b/arch/x86/events/intel/knc.c
@@ -238,7 +238,7 @@ static int knc_pmu_handle_irq(struct pt_
goto done;
}
- inc_irq_stat(apic_perf_irqs);
+ inc_irq_stat(APIC_PERF);
for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
struct perf_event *event = cpuc->events[bit];
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -1077,7 +1077,7 @@ static int p4_pmu_handle_irq(struct pt_r
}
if (handled)
- inc_irq_stat(apic_perf_irqs);
+ inc_irq_stat(APIC_PERF);
/*
* When dealing with the unmasking of the LVTPC on P4 perf hw, it has
--- a/arch/x86/events/zhaoxin/core.c
+++ b/arch/x86/events/zhaoxin/core.c
@@ -373,7 +373,7 @@ static int zhaoxin_pmu_handle_irq(struct
else
zhaoxin_pmu_ack_status(status);
- inc_irq_stat(apic_perf_irqs);
+ inc_irq_stat(APIC_PERF);
/*
* CondChgd bit 63 doesn't mean any overflow status. Ignore
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -219,7 +219,7 @@ static inline bool hv_reenlightenment_av
DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_reenlightenment)
{
apic_eoi();
- inc_irq_stat(irq_hv_reenlightenment_count);
+ inc_irq_stat(HYPERV_REENLIGHTENMENT);
schedule_delayed_work(&hv_reenlightenment_work, HZ/10);
}
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -4,51 +4,60 @@
#include <linux/threads.h>
-typedef struct {
-#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
- u8 kvm_cpu_l1tf_flush_l1d;
-#endif
- unsigned int __nmi_count; /* arch dependent */
+enum {
+ IRQ_COUNT_NMI,
#ifdef CONFIG_X86_LOCAL_APIC
- unsigned int apic_timer_irqs; /* arch dependent */
- unsigned int irq_spurious_count;
- unsigned int icr_read_retry_count;
+ IRQ_COUNT_APIC_TIMER,
+ IRQ_COUNT_SPURIOUS,
+ IRQ_COUNT_APIC_PERF,
+ IRQ_COUNT_IRQ_WORK,
+ IRQ_COUNT_ICR_READ_RETRY,
+ IRQ_COUNT_X86_PLATFORM_IPI,
#endif
-#if IS_ENABLED(CONFIG_KVM)
- unsigned int kvm_posted_intr_ipis;
- unsigned int kvm_posted_intr_wakeup_ipis;
- unsigned int kvm_posted_intr_nested_ipis;
-#endif
-#ifdef CONFIG_GUEST_PERF_EVENTS
- unsigned int perf_guest_mediated_pmis;
-#endif
- unsigned int x86_platform_ipis; /* arch dependent */
- unsigned int apic_perf_irqs;
- unsigned int apic_irq_work_irqs;
#ifdef CONFIG_SMP
- unsigned int irq_resched_count;
- unsigned int irq_call_count;
+ IRQ_COUNT_RESCHEDULE,
+ IRQ_COUNT_CALL_FUNCTION,
#endif
- unsigned int irq_tlb_count;
+ IRQ_COUNT_TLB,
#ifdef CONFIG_X86_THERMAL_VECTOR
- unsigned int irq_thermal_count;
+ IRQ_COUNT_THERMAL_APIC,
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
- unsigned int irq_threshold_count;
+ IRQ_COUNT_THRESHOLD_APIC,
#endif
#ifdef CONFIG_X86_MCE_AMD
- unsigned int irq_deferred_error_count;
+ IRQ_COUNT_DEFERRED_ERROR,
+#endif
+#ifdef CONFIG_X86_MCE
+ IRQ_COUNT_MCE_EXCEPTION,
+ IRQ_COUNT_MCE_POLL,
#endif
#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
- unsigned int irq_hv_callback_count;
+ IRQ_COUNT_HYPERVISOR_CALLBACK,
#endif
#if IS_ENABLED(CONFIG_HYPERV)
- unsigned int irq_hv_reenlightenment_count;
- unsigned int hyperv_stimer0_count;
+ IRQ_COUNT_HYPERV_REENLIGHTENMENT,
+ IRQ_COUNT_HYPERV_STIMER0,
+#endif
+#if IS_ENABLED(CONFIG_KVM)
+ IRQ_COUNT_POSTED_INTR,
+ IRQ_COUNT_POSTED_INTR_NESTED,
+ IRQ_COUNT_POSTED_INTR_WAKEUP,
+#endif
+#ifdef CONFIG_GUEST_PERF_EVENTS
+ IRQ_COUNT_PERF_GUEST_MEDIATED_PMI,
#endif
#ifdef CONFIG_X86_POSTED_MSI
- unsigned int posted_msi_notification_count;
+ IRQ_COUNT_POSTED_MSI_NOTIFICATION,
+#endif
+ IRQ_COUNT_MAX,
+};
+
+typedef struct {
+#if IS_ENABLED(CONFIG_CPU_MITIGATIONS) && IS_ENABLED(CONFIG_KVM_INTEL)
+ u8 kvm_cpu_l1tf_flush_l1d;
#endif
+ unsigned int counts[IRQ_COUNT_MAX];
} ____cacheline_aligned irq_cpustat_t;
DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
@@ -58,7 +67,7 @@ DECLARE_PER_CPU_ALIGNED(struct pi_desc,
#endif
#define __ARCH_IRQ_STAT
-#define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
+#define inc_irq_stat(index) this_cpu_inc(irq_stat.counts[IRQ_COUNT_##index])
extern void ack_bad_irq(unsigned int irq);
--- a/arch/x86/include/asm/irq.h
+++ b/arch/x86/include/asm/irq.h
@@ -47,4 +47,6 @@ void arch_trigger_cpumask_backtrace(cons
#define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
#endif
+void irq_init_stats(void);
+
#endif /* _ASM_X86_IRQ_H */
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -291,9 +291,6 @@ bool mce_is_memory_error(struct mce *m);
bool mce_is_correctable(struct mce *m);
bool mce_usable_address(struct mce *m);
-DECLARE_PER_CPU(unsigned, mce_exception_count);
-DECLARE_PER_CPU(unsigned, mce_poll_count);
-
typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1040,7 +1040,7 @@ static void local_apic_timer_interrupt(v
/*
* the NMI deadlock-detector uses this.
*/
- inc_irq_stat(apic_timer_irqs);
+ inc_irq_stat(APIC_TIMER);
evt->event_handler(evt);
}
@@ -2108,7 +2108,7 @@ static noinline void handle_spurious_int
trace_spurious_apic_entry(vector);
- inc_irq_stat(irq_spurious_count);
+ inc_irq_stat(SPURIOUS);
/*
* If this is a spurious interrupt then do not acknowledge
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -120,7 +120,7 @@ u32 apic_mem_wait_icr_idle_timeout(void)
for (cnt = 0; cnt < 1000; cnt++) {
if (!(apic_read(APIC_ICR) & APIC_ICR_BUSY))
return 0;
- inc_irq_stat(icr_read_retry_count);
+ inc_irq_stat(ICR_READ_RETRY);
udelay(100);
}
return APIC_ICR_BUSY;
--- a/arch/x86/kernel/cpu/acrn.c
+++ b/arch/x86/kernel/cpu/acrn.c
@@ -52,7 +52,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_acrn_hv_ca
* HYPERVISOR_CALLBACK_VECTOR.
*/
apic_eoi();
- inc_irq_stat(irq_hv_callback_count);
+ inc_irq_stat(HYPERVISOR_CALLBACK);
if (acrn_intr_handler)
acrn_intr_handler();
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -840,7 +840,7 @@ bool amd_mce_usable_address(struct mce *
DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
{
trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
- inc_irq_stat(irq_deferred_error_count);
+ inc_irq_stat(DEFERRED_ERROR);
deferred_error_int_vector();
trace_deferred_error_apic_exit(DEFERRED_ERROR_VECTOR);
apic_eoi();
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -67,8 +67,6 @@ static DEFINE_MUTEX(mce_sysfs_mutex);
#define SPINUNIT 100 /* 100ns */
-DEFINE_PER_CPU(unsigned, mce_exception_count);
-
DEFINE_PER_CPU_READ_MOSTLY(unsigned int, mce_num_banks);
DEFINE_PER_CPU_READ_MOSTLY(struct mce_bank[MAX_NR_BANKS], mce_banks_array);
@@ -716,8 +714,6 @@ static noinstr void mce_read_aux(struct
}
}
-DEFINE_PER_CPU(unsigned, mce_poll_count);
-
/*
* We have three scenarios for checking for Deferred errors:
*
@@ -820,7 +816,7 @@ void machine_check_poll(enum mcp_flags f
struct mce *m;
int i;
- this_cpu_inc(mce_poll_count);
+ inc_irq_stat(MCE_POLL);
mce_gather_info(&err, NULL);
m = &err.m;
@@ -1595,7 +1591,7 @@ noinstr void do_machine_check(struct pt_
*/
lmce = 1;
- this_cpu_inc(mce_exception_count);
+ inc_irq_stat(MCE_EXCEPTION);
mce_gather_info(&err, regs);
m = &err.m;
--- a/arch/x86/kernel/cpu/mce/threshold.c
+++ b/arch/x86/kernel/cpu/mce/threshold.c
@@ -37,7 +37,7 @@ void (*mce_threshold_vector)(void) = def
DEFINE_IDTENTRY_SYSVEC(sysvec_threshold)
{
trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR);
- inc_irq_stat(irq_threshold_count);
+ inc_irq_stat(THRESHOLD_APIC);
mce_threshold_vector();
trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR);
apic_eoi();
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -154,7 +154,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_cal
{
struct pt_regs *old_regs = set_irq_regs(regs);
- inc_irq_stat(irq_hv_callback_count);
+ inc_irq_stat(HYPERVISOR_CALLBACK);
if (mshv_handler)
mshv_handler();
@@ -191,7 +191,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_sti
{
struct pt_regs *old_regs = set_irq_regs(regs);
- inc_irq_stat(hyperv_stimer0_count);
+ inc_irq_stat(HYPERV_STIMER0);
if (hv_stimer0_handler)
hv_stimer0_handler();
add_interrupt_randomness(HYPERV_STIMER0_VECTOR);
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -62,156 +62,102 @@ void ack_bad_irq(unsigned int irq)
apic_eoi();
}
-/*
- * A helper routine for putting space and decimal number without overhead
- * from rich format of printf().
- */
-static void put_decimal(struct seq_file *p, unsigned long long num)
-{
- const char *delimiter = " ";
- unsigned int width = 10;
+struct irq_stat_info {
+ unsigned int skip_vector;
+ const char *symbol;
+ const char *text;
+};
- seq_put_decimal_ull_width(p, delimiter, num, width);
-}
+#define ISS(idx, sym, txt) [IRQ_COUNT_##idx] = { .symbol = sym, .text = txt }
-#define irq_stats(x) (&per_cpu(irq_stat, x))
-/*
- * /proc/interrupts printing for arch specific interrupts
- */
-int arch_show_interrupts(struct seq_file *p, int prec)
-{
- int j;
+#define ITS(idx, sym, txt) [IRQ_COUNT_##idx] = \
+ { .skip_vector = idx## _VECTOR, .symbol = sym, .text = txt }
- seq_printf(p, "%*s:", prec, "NMI");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->__nmi_count);
- seq_puts(p, " Non-maskable interrupts\n");
+static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = {
+ ISS(NMI, "NMI", " Non-maskable interrupts\n"),
#ifdef CONFIG_X86_LOCAL_APIC
- seq_printf(p, "%*s:", prec, "LOC");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->apic_timer_irqs);
- seq_puts(p, " Local timer interrupts\n");
-
- seq_printf(p, "%*s:", prec, "SPU");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->irq_spurious_count);
- seq_puts(p, " Spurious interrupts\n");
- seq_printf(p, "%*s:", prec, "PMI");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->apic_perf_irqs);
- seq_puts(p, " Performance monitoring interrupts\n");
- seq_printf(p, "%*s:", prec, "IWI");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->apic_irq_work_irqs);
- seq_puts(p, " IRQ work interrupts\n");
- seq_printf(p, "%*s:", prec, "RTR");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->icr_read_retry_count);
- seq_puts(p, " APIC ICR read retries\n");
- if (x86_platform_ipi_callback) {
- seq_printf(p, "%*s:", prec, "PLT");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->x86_platform_ipis);
- seq_puts(p, " Platform interrupts\n");
- }
+ ISS(APIC_TIMER, "LOC", " Local timer interrupts\n"),
+ ISS(SPURIOUS, "SPU", " Spurious interrupts\n"),
+ ISS(APIC_PERF, "PMI", " Performance monitoring interrupts\n"),
+ ISS(IRQ_WORK, "IWI", " IRQ work interrupts\n"),
+ ISS(ICR_READ_RETRY, "RTR", " APIC ICR read retries\n"),
+ ISS(X86_PLATFORM_IPI, "PLT", " Platform interrupts\n"),
#endif
#ifdef CONFIG_SMP
- seq_printf(p, "%*s:", prec, "RES");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->irq_resched_count);
- seq_puts(p, " Rescheduling interrupts\n");
- seq_printf(p, "%*s:", prec, "CAL");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->irq_call_count);
- seq_puts(p, " Function call interrupts\n");
- seq_printf(p, "%*s:", prec, "TLB");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->irq_tlb_count);
- seq_puts(p, " TLB shootdowns\n");
+ ISS(RESCHEDULE, "RES", " Rescheduling interrupts\n"),
+ ISS(CALL_FUNCTION, "CAL", " Function call interrupts\n"),
#endif
+ ISS(TLB, "TLB", " TLB shootdowns\n"),
#ifdef CONFIG_X86_THERMAL_VECTOR
- seq_printf(p, "%*s:", prec, "TRM");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->irq_thermal_count);
- seq_puts(p, " Thermal event interrupts\n");
+ ISS(THERMAL_APIC, "TRM", " Thermal event interrupt\n"),
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
- seq_printf(p, "%*s:", prec, "THR");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->irq_threshold_count);
- seq_puts(p, " Threshold APIC interrupts\n");
+ ISS(THRESHOLD_APIC, "THR", " Threshold APIC interrupts\n"),
#endif
#ifdef CONFIG_X86_MCE_AMD
- seq_printf(p, "%*s:", prec, "DFR");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->irq_deferred_error_count);
- seq_puts(p, " Deferred Error APIC interrupts\n");
+ ISS(DEFERRED_ERROR, "DFR", " Deferred Error APIC interrupts\n"),
#endif
#ifdef CONFIG_X86_MCE
- seq_printf(p, "%*s:", prec, "MCE");
- for_each_online_cpu(j)
- put_decimal(p, per_cpu(mce_exception_count, j));
- seq_puts(p, " Machine check exceptions\n");
- seq_printf(p, "%*s:", prec, "MCP");
- for_each_online_cpu(j)
- put_decimal(p, per_cpu(mce_poll_count, j));
- seq_puts(p, " Machine check polls\n");
+ ISS(MCE_EXCEPTION, "MCE", " Machine check exceptions\n"),
+ ISS(MCE_POLL, "MCP", " Machine check polls\n"),
#endif
#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
- if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
- seq_printf(p, "%*s:", prec, "HYP");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->irq_hv_callback_count);
- seq_puts(p, " Hypervisor callback interrupts\n");
- }
+ ITS(HYPERVISOR_CALLBACK, "HYP", " Hypervisor callback interrupts\n"),
#endif
#if IS_ENABLED(CONFIG_HYPERV)
- if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
- seq_printf(p, "%*s:", prec, "HRE");
- for_each_online_cpu(j)
- put_decimal(p,
- irq_stats(j)->irq_hv_reenlightenment_count);
- seq_puts(p, " Hyper-V reenlightenment interrupts\n");
- }
- if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) {
- seq_printf(p, "%*s:", prec, "HVS");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->hyperv_stimer0_count);
- seq_puts(p, " Hyper-V stimer0 interrupts\n");
- }
-#endif
- seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
-#if defined(CONFIG_X86_IO_APIC)
- seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
+ ITS(HYPERV_REENLIGHTENMENT, "HRE", " Hyper-V reenlightment interrupts\n"),
+ ITS(HYPERV_STIMER0, "HVS", " Hyper-V stimer0 interrupts\n"),
#endif
#if IS_ENABLED(CONFIG_KVM)
- seq_printf(p, "%*s:", prec, "PIN");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->kvm_posted_intr_ipis);
- seq_puts(p, " Posted-interrupt notification event\n");
-
- seq_printf(p, "%*s:", prec, "NPI");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->kvm_posted_intr_nested_ipis);
- seq_puts(p, " Nested posted-interrupt event\n");
-
- seq_printf(p, "%*s:", prec, "PIW");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->kvm_posted_intr_wakeup_ipis);
- seq_puts(p, " Posted-interrupt wakeup event\n");
+ ITS(POSTED_INTR, "PIN", " Posted-interrupt notification event\n"),
+ ITS(POSTED_INTR_NESTED, "NPI", " Nested posted-interrupt event\n"),
+ ITS(POSTED_INTR_WAKEUP, "PIW", " Posted-interrupt wakeup event\n"),
#endif
#ifdef CONFIG_GUEST_PERF_EVENTS
- seq_printf(p, "%*s:", prec, "VPMI");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->perf_guest_mediated_pmis);
- seq_puts(p, " Perf Guest Mediated PMI\n");
+ ISS(PERF_GUEST_MEDIATED_PMI, "VPMI", " Perf Guest Mediated PMI\n"),
#endif
#ifdef CONFIG_X86_POSTED_MSI
- seq_printf(p, "%*s:", prec, "PMN");
- for_each_online_cpu(j)
- put_decimal(p, irq_stats(j)->posted_msi_notification_count);
- seq_puts(p, " Posted MSI notification event\n");
+ ISS(POSTED_MSI_NOTIFICATION, "PMN", " Posted MSI notification event\n"),
#endif
+};
+
+void __init irq_init_stats(void)
+{
+ struct irq_stat_info *info = irq_stat_info;
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
+ if (info->skip_vector && test_bit(info->skip_vector, system_vectors))
+ info->skip_vector = 0;
+ }
+
+ if (!x86_platform_ipi_callback)
+ irq_stat_info[IRQ_COUNT_X86_PLATFORM_IPI].skip_vector = 1;
+
+#ifdef CONFIG_X86_POSTED_MSI
+ if (!posted_msi_enabled())
+ irq_stat_info[IRQ_COUNT_X86_POSTED_MSI].skip_vector = 1;
+#endif
+}
+
+/*
+ * /proc/interrupts printing for arch specific interrupts
+ */
+int arch_show_interrupts(struct seq_file *p, int prec)
+{
+ const struct irq_stat_info *info = irq_stat_info;
+
+ for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) {
+ if (info->skip_vector)
+ continue;
+
+ seq_printf(p, "%*s:", prec, info->symbol);
+ irq_proc_emit_counts(p, &irq_stat.counts[i]);
+ seq_puts(p, info->text);
+ }
+
+ seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
+ if (IS_ENABLED(CONFIG_X86_IO_APIC))
+ seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
return 0;
}
@@ -220,38 +166,11 @@ int arch_show_interrupts(struct seq_file
*/
u64 arch_irq_stat_cpu(unsigned int cpu)
{
- u64 sum = irq_stats(cpu)->__nmi_count;
+ irq_cpustat_t *p = per_cpu_ptr(&irq_stat, cpu);
+ u64 sum = 0;
-#ifdef CONFIG_X86_LOCAL_APIC
- sum += irq_stats(cpu)->apic_timer_irqs;
- sum += irq_stats(cpu)->irq_spurious_count;
- sum += irq_stats(cpu)->apic_perf_irqs;
- sum += irq_stats(cpu)->apic_irq_work_irqs;
- sum += irq_stats(cpu)->icr_read_retry_count;
- if (x86_platform_ipi_callback)
- sum += irq_stats(cpu)->x86_platform_ipis;
-#endif
-#ifdef CONFIG_SMP
- sum += irq_stats(cpu)->irq_resched_count;
- sum += irq_stats(cpu)->irq_call_count;
-#endif
-#ifdef CONFIG_X86_THERMAL_VECTOR
- sum += irq_stats(cpu)->irq_thermal_count;
-#endif
-#ifdef CONFIG_X86_MCE_THRESHOLD
- sum += irq_stats(cpu)->irq_threshold_count;
-#endif
-#ifdef CONFIG_X86_HV_CALLBACK_VECTOR
- sum += irq_stats(cpu)->irq_hv_callback_count;
-#endif
-#if IS_ENABLED(CONFIG_HYPERV)
- sum += irq_stats(cpu)->irq_hv_reenlightenment_count;
- sum += irq_stats(cpu)->hyperv_stimer0_count;
-#endif
-#ifdef CONFIG_X86_MCE
- sum += per_cpu(mce_exception_count, cpu);
- sum += per_cpu(mce_poll_count, cpu);
-#endif
+ for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++)
+ sum += p->counts[i];
return sum;
}
@@ -344,7 +263,7 @@ DEFINE_IDTENTRY_IRQ(common_interrupt)
#ifdef CONFIG_X86_LOCAL_APIC
/* Function pointer for generic interrupt vector handling */
-void (*x86_platform_ipi_callback)(void) = NULL;
+void (*x86_platform_ipi_callback)(void) __ro_after_init = NULL;
/*
* Handler for X86_PLATFORM_IPI_VECTOR.
*/
@@ -354,7 +273,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platfo
apic_eoi();
trace_x86_platform_ipi_entry(X86_PLATFORM_IPI_VECTOR);
- inc_irq_stat(x86_platform_ipis);
+ inc_irq_stat(X86_PLATFORM_IPI);
if (x86_platform_ipi_callback)
x86_platform_ipi_callback();
trace_x86_platform_ipi_exit(X86_PLATFORM_IPI_VECTOR);
@@ -369,7 +288,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_x86_platfo
DEFINE_IDTENTRY_SYSVEC(sysvec_perf_guest_mediated_pmi_handler)
{
apic_eoi();
- inc_irq_stat(perf_guest_mediated_pmis);
+ inc_irq_stat(PERF_GUEST_MEDIATED_PMI);
perf_guest_handle_mediated_pmi();
}
#endif
@@ -395,7 +314,7 @@ EXPORT_SYMBOL_FOR_KVM(kvm_set_posted_int
DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_ipi)
{
apic_eoi();
- inc_irq_stat(kvm_posted_intr_ipis);
+ inc_irq_stat(POSTED_INTR);
}
/*
@@ -404,7 +323,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm
DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted_intr_wakeup_ipi)
{
apic_eoi();
- inc_irq_stat(kvm_posted_intr_wakeup_ipis);
+ inc_irq_stat(POSTED_INTR_WAKEUP);
kvm_posted_intr_wakeup_handler();
}
@@ -414,7 +333,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_posted
DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_kvm_posted_intr_nested_ipi)
{
apic_eoi();
- inc_irq_stat(kvm_posted_intr_nested_ipis);
+ inc_irq_stat(POSTED_INTR_NESTED);
}
#endif
@@ -488,7 +407,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_posted_msi
/* Mark the handler active for intel_ack_posted_msi_irq() */
__this_cpu_write(posted_msi_handler_active, true);
- inc_irq_stat(posted_msi_notification_count);
+ inc_irq_stat(POSTED_MSI_NOTIFICATION);
irq_enter();
/*
@@ -583,7 +502,7 @@ static void smp_thermal_vector(void)
DEFINE_IDTENTRY_SYSVEC(sysvec_thermal)
{
trace_thermal_apic_entry(THERMAL_APIC_VECTOR);
- inc_irq_stat(irq_thermal_count);
+ inc_irq_stat(THERMAL_APIC);
smp_thermal_vector();
trace_thermal_apic_exit(THERMAL_APIC_VECTOR);
apic_eoi();
--- a/arch/x86/kernel/irq_work.c
+++ b/arch/x86/kernel/irq_work.c
@@ -18,7 +18,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_irq_work)
{
apic_eoi();
trace_irq_work_entry(IRQ_WORK_VECTOR);
- inc_irq_stat(apic_irq_work_irqs);
+ inc_irq_stat(IRQ_WORK);
irq_work_run();
trace_irq_work_exit(IRQ_WORK_VECTOR);
}
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -104,6 +104,8 @@ void __init native_init_IRQ(void)
if (!cpu_feature_enabled(X86_FEATURE_FRED))
idt_setup_apic_and_irq_gates();
+ irq_init_stats();
+
lapic_assign_system_vectors();
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs()) {
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -310,7 +310,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncp
apic_eoi();
- inc_irq_stat(irq_hv_callback_count);
+ inc_irq_stat(HYPERVISOR_CALLBACK);
if (__this_cpu_read(async_pf_enabled)) {
token = __this_cpu_read(apf_reason.token);
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -576,7 +576,7 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
irq_state = irqentry_nmi_enter(regs);
- inc_irq_stat(__nmi_count);
+ inc_irq_stat(NMI);
if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
@@ -725,7 +725,7 @@ DEFINE_FREDENTRY_NMI(exc_nmi)
irq_state = irqentry_nmi_enter(regs);
- inc_irq_stat(__nmi_count);
+ inc_irq_stat(NMI);
default_do_nmi(regs);
irqentry_nmi_exit(regs, irq_state);
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -249,7 +249,7 @@ DEFINE_IDTENTRY_SYSVEC_SIMPLE(sysvec_res
{
apic_eoi();
trace_reschedule_entry(RESCHEDULE_VECTOR);
- inc_irq_stat(irq_resched_count);
+ inc_irq_stat(RESCHEDULE);
scheduler_ipi();
trace_reschedule_exit(RESCHEDULE_VECTOR);
}
@@ -258,7 +258,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_call_funct
{
apic_eoi();
trace_call_function_entry(CALL_FUNCTION_VECTOR);
- inc_irq_stat(irq_call_count);
+ inc_irq_stat(CALL_FUNCTION);
generic_smp_call_function_interrupt();
trace_call_function_exit(CALL_FUNCTION_VECTOR);
}
@@ -267,7 +267,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_call_funct
{
apic_eoi();
trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR);
- inc_irq_stat(irq_call_count);
+ inc_irq_stat(CALL_FUNCTION);
generic_smp_call_function_single_interrupt();
trace_call_function_single_exit(CALL_FUNCTION_SINGLE_VECTOR);
}
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -1144,7 +1144,7 @@ static void flush_tlb_func(void *info)
VM_WARN_ON(!irqs_disabled());
if (!local) {
- inc_irq_stat(irq_tlb_count);
+ inc_irq_stat(TLB);
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
}
--- a/arch/x86/xen/enlighten_hvm.c
+++ b/arch/x86/xen/enlighten_hvm.c
@@ -125,7 +125,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_xen_hvm_ca
if (xen_percpu_upcall)
apic_eoi();
- inc_irq_stat(irq_hv_callback_count);
+ inc_irq_stat(HYPERVISOR_CALLBACK);
xen_evtchn_do_upcall();
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -728,7 +728,7 @@ static void __xen_pv_evtchn_do_upcall(st
{
struct pt_regs *old_regs = set_irq_regs(regs);
- inc_irq_stat(irq_hv_callback_count);
+ inc_irq_stat(HYPERVISOR_CALLBACK);
xen_evtchn_do_upcall();
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -23,7 +23,7 @@ static irqreturn_t xen_call_function_sin
*/
static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
{
- inc_irq_stat(irq_resched_count);
+ inc_irq_stat(RESCHEDULE);
scheduler_ipi();
return IRQ_HANDLED;
@@ -254,7 +254,7 @@ void xen_send_IPI_allbutself(int vector)
static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
{
generic_smp_call_function_interrupt();
- inc_irq_stat(irq_call_count);
+ inc_irq_stat(CALL_FUNCTION);
return IRQ_HANDLED;
}
@@ -262,7 +262,7 @@ static irqreturn_t xen_call_function_int
static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
{
generic_smp_call_function_single_interrupt();
- inc_irq_stat(irq_call_count);
+ inc_irq_stat(CALL_FUNCTION);
return IRQ_HANDLED;
}
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -400,7 +400,7 @@ static void xen_pv_stop_other_cpus(int w
static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
{
irq_work_run();
- inc_irq_stat(apic_irq_work_irqs);
+ inc_irq_stat(IRQ_WORK);
return IRQ_HANDLED;
}
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 05/14] genirq: Expose nr_irqs in core code
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (3 preceding siblings ...)
2026-03-20 13:21 ` [patch v2 04/14] x86/irq: Make irqstats array based Thomas Gleixner
@ 2026-03-20 13:21 ` Thomas Gleixner
2026-03-23 19:48 ` Radu Rendec
2026-03-20 13:21 ` [patch v2 06/14] genirq: Cache the condition for /proc/interrupts exposure Thomas Gleixner
` (9 subsequent siblings)
14 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:21 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
... to avoid function calls in the core code to retrieve the maximum number
of interrupts.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
kernel/irq/internals.h | 1 +
kernel/irq/irqdesc.c | 28 ++++++++++++++--------------
kernel/irq/proc.c | 2 +-
3 files changed, 16 insertions(+), 15 deletions(-)
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -21,6 +21,7 @@
extern bool noirqdebug;
extern int irq_poll_cpu;
+extern unsigned int total_nr_irqs;
extern struct irqaction chained_action;
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -140,14 +140,14 @@ static void desc_set_defaults(unsigned i
desc_smp_init(desc, node, affinity);
}
-static unsigned int nr_irqs = NR_IRQS;
+unsigned int total_nr_irqs __read_mostly = NR_IRQS;
/**
* irq_get_nr_irqs() - Number of interrupts supported by the system.
*/
unsigned int irq_get_nr_irqs(void)
{
- return nr_irqs;
+ return total_nr_irqs;
}
EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
@@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
*/
unsigned int irq_set_nr_irqs(unsigned int nr)
{
- nr_irqs = nr;
+ total_nr_irqs = nr;
return nr;
}
@@ -187,9 +187,9 @@ static unsigned int irq_find_at_or_after
struct irq_desc *desc;
guard(rcu)();
- desc = mt_find(&sparse_irqs, &index, nr_irqs);
+ desc = mt_find(&sparse_irqs, &index, total_nr_irqs);
- return desc ? irq_desc_get_irq(desc) : nr_irqs;
+ return desc ? irq_desc_get_irq(desc) : total_nr_irqs;
}
static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
@@ -543,7 +543,7 @@ static bool irq_expand_nr_irqs(unsigned
{
if (nr > MAX_SPARSE_IRQS)
return false;
- nr_irqs = nr;
+ total_nr_irqs = nr;
return true;
}
@@ -557,16 +557,16 @@ int __init early_irq_init(void)
/* Let arch update nr_irqs and return the nr of preallocated irqs */
initcnt = arch_probe_nr_irqs();
printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n",
- NR_IRQS, nr_irqs, initcnt);
+ NR_IRQS, total_nr_irqs, initcnt);
- if (WARN_ON(nr_irqs > MAX_SPARSE_IRQS))
- nr_irqs = MAX_SPARSE_IRQS;
+ if (WARN_ON(total_nr_irqs > MAX_SPARSE_IRQS))
+ total_nr_irqs = MAX_SPARSE_IRQS;
if (WARN_ON(initcnt > MAX_SPARSE_IRQS))
initcnt = MAX_SPARSE_IRQS;
- if (initcnt > nr_irqs)
- nr_irqs = initcnt;
+ if (initcnt > total_nr_irqs)
+ total_nr_irqs = initcnt;
for (i = 0; i < initcnt; i++) {
desc = alloc_desc(i, node, 0, NULL, NULL);
@@ -862,7 +862,7 @@ void irq_free_descs(unsigned int from, u
{
int i;
- if (from >= nr_irqs || (from + cnt) > nr_irqs)
+ if (from >= total_nr_irqs || (from + cnt) > total_nr_irqs)
return;
guard(mutex)(&sparse_irq_lock);
@@ -911,7 +911,7 @@ int __ref __irq_alloc_descs(int irq, uns
if (irq >=0 && start != irq)
return -EEXIST;
- if (start + cnt > nr_irqs) {
+ if (start + cnt > total_nr_irqs) {
if (!irq_expand_nr_irqs(start + cnt))
return -ENOMEM;
}
@@ -923,7 +923,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
* irq_get_next_irq - get next allocated irq number
* @offset: where to start the search
*
- * Returns next irq number after offset or nr_irqs if none is found.
+ * Returns next irq number after offset or total_nr_irqs if none is found.
*/
unsigned int irq_get_next_irq(unsigned int offset)
{
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -447,7 +447,7 @@ int __weak arch_show_interrupts(struct s
}
#ifndef ACTUAL_NR_IRQS
-# define ACTUAL_NR_IRQS irq_get_nr_irqs()
+# define ACTUAL_NR_IRQS total_nr_irqs
#endif
#define ZSTR1 " 0"
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 06/14] genirq: Cache the condition for /proc/interrupts exposure
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (4 preceding siblings ...)
2026-03-20 13:21 ` [patch v2 05/14] genirq: Expose nr_irqs in core code Thomas Gleixner
@ 2026-03-20 13:21 ` Thomas Gleixner
2026-03-23 20:58 ` Radu Rendec
2026-03-20 13:21 ` [patch v2 07/14] genirq: Calculate precision only when required Thomas Gleixner
` (8 subsequent siblings)
14 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:21 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
show_interrupts() evaluates a boatload of conditions to establish whether
exposing an interrupt in /proc/interrupts or not.
That can be simplified by caching the condition in an internal status flag,
which is updated when one of the relevant inputs changes.
As a result the number of instructions and branches for reading
/proc/interrupts is reduced significantly.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
V2: s/IRQF_/IRQ/ and fixup the enum treatment - Dmitry
---
include/linux/irq.h | 1 +
kernel/irq/chip.c | 2 ++
kernel/irq/internals.h | 2 ++
kernel/irq/manage.c | 2 ++
kernel/irq/proc.c | 16 ++++++++++++----
kernel/irq/settings.h | 13 +++++++++++++
6 files changed, 32 insertions(+), 4 deletions(-)
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -99,6 +99,7 @@ enum {
IRQ_DISABLE_UNLAZY = (1 << 19),
IRQ_HIDDEN = (1 << 20),
IRQ_NO_DEBUG = (1 << 21),
+ IRQ_RESERVED = (1 << 22),
};
#define IRQF_MODIFY_MASK \
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -1004,6 +1004,7 @@ static void
WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc)));
irq_activate_and_startup(desc, IRQ_RESEND);
}
+ irq_proc_update_valid(desc);
}
void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
@@ -1064,6 +1065,7 @@ void irq_modify_status(unsigned int irq,
trigger = tmp;
irqd_set(&desc->irq_data, trigger);
+ irq_proc_update_valid(desc);
}
}
EXPORT_SYMBOL_GPL(irq_modify_status);
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -123,6 +123,7 @@ extern void register_irq_proc(unsigned i
extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc);
extern void register_handler_proc(unsigned int irq, struct irqaction *action);
extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
+void irq_proc_update_valid(struct irq_desc *desc);
#else
static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
static inline void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) { }
@@ -130,6 +131,7 @@ static inline void register_handler_proc
struct irqaction *action) { }
static inline void unregister_handler_proc(unsigned int irq,
struct irqaction *action) { }
+static inline void irq_proc_update_valid(struct irq_desc *desc) { }
#endif
extern bool irq_can_set_affinity_usr(unsigned int irq);
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1802,6 +1802,7 @@ static int
__enable_irq(desc);
}
+ irq_proc_update_valid(desc);
raw_spin_unlock_irqrestore(&desc->lock, flags);
chip_bus_sync_unlock(desc);
mutex_unlock(&desc->request_mutex);
@@ -1906,6 +1907,7 @@ static struct irqaction *__free_irq(stru
desc->affinity_hint = NULL;
#endif
+ irq_proc_update_valid(desc);
raw_spin_unlock_irqrestore(&desc->lock, flags);
/*
* Drop bus_lock here so the changes which were done in the chip
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -439,6 +439,17 @@ void init_irq_proc(void)
register_irq_proc(irq, desc);
}
+void irq_proc_update_valid(struct irq_desc *desc)
+{
+ u32 set = _IRQ_PROC_VALID;
+
+ if (irq_settings_is_hidden(desc) || !desc->action ||
+ irq_desc_is_chained(desc) || !desc->kstat_irqs)
+ set = 0;
+
+ irq_settings_update_proc_valid(desc, set);
+}
+
#ifdef CONFIG_GENERIC_IRQ_SHOW
int __weak arch_show_interrupts(struct seq_file *p, int prec)
@@ -514,10 +525,7 @@ int show_interrupts(struct seq_file *p,
guard(rcu)();
desc = irq_to_desc(i);
- if (!desc || irq_settings_is_hidden(desc))
- return 0;
-
- if (!desc->action || irq_desc_is_chained(desc) || !desc->kstat_irqs)
+ if (!desc || !irq_settings_proc_valid(desc))
return 0;
seq_printf(p, "%*d:", prec, i);
--- a/kernel/irq/settings.h
+++ b/kernel/irq/settings.h
@@ -18,6 +18,7 @@ enum {
_IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY,
_IRQ_HIDDEN = IRQ_HIDDEN,
_IRQ_NO_DEBUG = IRQ_NO_DEBUG,
+ _IRQ_PROC_VALID = IRQ_RESERVED,
_IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
};
@@ -34,6 +35,7 @@ enum {
#define IRQ_DISABLE_UNLAZY GOT_YOU_MORON
#define IRQ_HIDDEN GOT_YOU_MORON
#define IRQ_NO_DEBUG GOT_YOU_MORON
+#define IRQ_RESERVED GOT_YOU_MORON
#undef IRQF_MODIFY_MASK
#define IRQF_MODIFY_MASK GOT_YOU_MORON
@@ -180,3 +182,14 @@ static inline bool irq_settings_no_debug
{
return desc->status_use_accessors & _IRQ_NO_DEBUG;
}
+
+static inline bool irq_settings_proc_valid(struct irq_desc *desc)
+{
+ return desc->status_use_accessors & _IRQ_PROC_VALID;
+}
+
+static inline void irq_settings_update_proc_valid(struct irq_desc *desc, u32 set)
+{
+ desc->status_use_accessors &= ~_IRQ_PROC_VALID;
+ desc->status_use_accessors |= (set & _IRQ_PROC_VALID);
+}
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 07/14] genirq: Calculate precision only when required
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (5 preceding siblings ...)
2026-03-20 13:21 ` [patch v2 06/14] genirq: Cache the condition for /proc/interrupts exposure Thomas Gleixner
@ 2026-03-20 13:21 ` Thomas Gleixner
2026-03-25 19:47 ` Radu Rendec
2026-03-20 13:22 ` [patch v2 08/14] genirq: Add rcuref count to struct irq_desc Thomas Gleixner
` (7 subsequent siblings)
14 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:21 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
Calculating the precision of the interrupt number column on every initial
show_interrupt() invocation is a pointless exercise as the underlying
maximum number of interrupts rarely changes.
Calculate it only when that number is modified and let show_interrupts()
use the cached value.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
---
kernel/irq/internals.h | 6 ++++++
kernel/irq/irqdesc.c | 10 ++++++----
kernel/irq/proc.c | 28 +++++++++++++++++++---------
3 files changed, 31 insertions(+), 13 deletions(-)
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -134,6 +134,12 @@ static inline void unregister_handler_pr
static inline void irq_proc_update_valid(struct irq_desc *desc) { }
#endif
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_GENERIC_IRQ_SHOW)
+void irq_proc_calc_prec(void);
+#else
+static inline void irq_proc_calc_prec(void) { }
+#endif
+
extern bool irq_can_set_affinity_usr(unsigned int irq);
extern int irq_do_set_affinity(struct irq_data *data,
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -157,13 +157,12 @@ EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
*
* Return: @nr.
*/
-unsigned int irq_set_nr_irqs(unsigned int nr)
+unsigned int __init irq_set_nr_irqs(unsigned int nr)
{
total_nr_irqs = nr;
-
+ irq_proc_calc_prec();
return nr;
}
-EXPORT_SYMBOL_GPL(irq_set_nr_irqs);
static DEFINE_MUTEX(sparse_irq_lock);
static struct maple_tree sparse_irqs = MTREE_INIT_EXT(sparse_irqs,
@@ -544,6 +543,7 @@ static bool irq_expand_nr_irqs(unsigned
if (nr > MAX_SPARSE_IRQS)
return false;
total_nr_irqs = nr;
+ irq_proc_calc_prec();
return true;
}
@@ -572,6 +572,7 @@ int __init early_irq_init(void)
desc = alloc_desc(i, node, 0, NULL, NULL);
irq_insert_desc(i, desc);
}
+ irq_proc_calc_prec();
return arch_early_irq_init();
}
@@ -592,7 +593,7 @@ int __init early_irq_init(void)
init_irq_default_affinity();
- printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS);
+ pr_info("NR_IRQS: %d\n", NR_IRQS);
count = ARRAY_SIZE(irq_desc);
@@ -602,6 +603,7 @@ int __init early_irq_init(void)
goto __free_desc_res;
}
+ irq_proc_calc_prec();
return arch_early_irq_init();
__free_desc_res:
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -457,10 +457,21 @@ int __weak arch_show_interrupts(struct s
return 0;
}
+static int irq_num_prec __read_mostly = 3;
+
#ifndef ACTUAL_NR_IRQS
# define ACTUAL_NR_IRQS total_nr_irqs
#endif
+void irq_proc_calc_prec(void)
+{
+ unsigned int prec, n;
+
+ for (prec = 3, n = 1000; prec < 10 && n <= total_nr_irqs; ++prec)
+ n *= 10;
+ WRITE_ONCE(irq_num_prec, prec);
+}
+
#define ZSTR1 " 0"
#define ZSTR1_LEN (sizeof(ZSTR1) - 1)
#define ZSTR16 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 \
@@ -499,8 +510,7 @@ void irq_proc_emit_counts(struct seq_fil
int show_interrupts(struct seq_file *p, void *v)
{
- const unsigned int nr_irqs = irq_get_nr_irqs();
- static int prec;
+ int prec = READ_ONCE(irq_num_prec);
int i = *(loff_t *) v, j;
struct irqaction *action;
@@ -514,9 +524,6 @@ int show_interrupts(struct seq_file *p,
/* print header and calculate the width of the first column */
if (i == 0) {
- for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
- j *= 10;
-
seq_printf(p, "%*s", prec + 8, "");
for_each_online_cpu(j)
seq_printf(p, "CPU%-8d", j);
@@ -552,13 +559,16 @@ int show_interrupts(struct seq_file *p,
} else {
seq_printf(p, "%8s", "None");
}
+
+ seq_putc(p, ' ');
if (desc->irq_data.domain)
- seq_printf(p, " %*lu", prec, desc->irq_data.hwirq);
+ seq_put_decimal_ull_width(p, "", desc->irq_data.hwirq, prec);
else
seq_printf(p, " %*s", prec, "");
-#ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL
- seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
-#endif
+
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_SHOW_LEVEL))
+ seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
+
if (desc->name)
seq_printf(p, "-%-8s", desc->name);
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 08/14] genirq: Add rcuref count to struct irq_desc
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (6 preceding siblings ...)
2026-03-20 13:21 ` [patch v2 07/14] genirq: Calculate precision only when required Thomas Gleixner
@ 2026-03-20 13:22 ` Thomas Gleixner
2026-03-26 18:43 ` Dmitry Ilvokhin
2026-03-20 13:22 ` [patch v2 09/14] genirq: Expose irq_find_desc_at_or_after() in core code Thomas Gleixner
` (6 subsequent siblings)
14 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:22 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
Prepare for a smarter iterator for /proc/interrupts so that the next
interrupt descriptor can be cached after lookup.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
include/linux/irqdesc.h | 2 ++
kernel/irq/internals.h | 17 ++++++++++++++++-
kernel/irq/irqdesc.c | 21 +++++++++++++--------
3 files changed, 31 insertions(+), 9 deletions(-)
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -70,6 +70,7 @@ struct irq_redirect {
* IRQF_NO_SUSPEND set
* @force_resume_depth: number of irqactions on a irq descriptor with
* IRQF_FORCE_RESUME set
+ * @refcnt: Reference count mainly for /proc/interrupts
* @rcu: rcu head for delayed free
* @kobj: kobject used to represent this struct in sysfs
* @request_mutex: mutex to protect request/free before locking desc->lock
@@ -119,6 +120,7 @@ struct irq_desc {
struct dentry *debugfs_file;
const char *dev_name;
#endif
+ rcuref_t refcnt;
#ifdef CONFIG_SPARSE_IRQ
struct rcu_head rcu;
struct kobject kobj;
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -9,6 +9,7 @@
#include <linux/irqdesc.h>
#include <linux/kernel_stat.h>
#include <linux/pm_runtime.h>
+#include <linux/rcuref.h>
#include <linux/sched/clock.h>
#ifdef CONFIG_SPARSE_IRQ
@@ -101,9 +102,23 @@ extern void unmask_irq(struct irq_desc *
extern void unmask_threaded_irq(struct irq_desc *desc);
#ifdef CONFIG_SPARSE_IRQ
-static inline void irq_mark_irq(unsigned int irq) { }
+static __always_inline void irq_mark_irq(unsigned int irq) { }
+void irq_desc_free_rcu(struct irq_desc *desc);
+
+static __always_inline bool irq_desc_get_ref(struct irq_desc *desc)
+{
+ return rcuref_get(&desc->refcnt);
+}
+
+static __always_inline void irq_desc_put_ref(struct irq_desc *desc)
+{
+ if (rcuref_put(&desc->refcnt))
+ irq_desc_free_rcu(desc);
+}
#else
extern void irq_mark_irq(unsigned int irq);
+static __always_inline bool irq_desc_get_ref(struct irq_desc *desc) { return true; }
+static __always_inline void irq_desc_put_ref(struct irq_desc *desc) { }
#endif
irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc);
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -137,6 +137,7 @@ static void desc_set_defaults(unsigned i
desc->tot_count = 0;
desc->name = NULL;
desc->owner = owner;
+ rcuref_init(&desc->refcnt, 1);
desc_smp_init(desc, node, affinity);
}
@@ -465,6 +466,17 @@ static void delayed_free_desc(struct rcu
kobject_put(&desc->kobj);
}
+void irq_desc_free_rcu(struct irq_desc *desc)
+{
+ /*
+ * We free the descriptor, masks and stat fields via RCU. That
+ * allows demultiplex interrupts to do rcu based management of
+ * the child interrupts.
+ * This also allows us to use rcu in kstat_irqs_usr().
+ */
+ call_rcu(&desc->rcu, delayed_free_desc);
+}
+
static void free_desc(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -483,14 +495,7 @@ static void free_desc(unsigned int irq)
*/
irq_sysfs_del(desc);
delete_irq_desc(irq);
-
- /*
- * We free the descriptor, masks and stat fields via RCU. That
- * allows demultiplex interrupts to do rcu based management of
- * the child interrupts.
- * This also allows us to use rcu in kstat_irqs_usr().
- */
- call_rcu(&desc->rcu, delayed_free_desc);
+ irq_desc_put_ref(desc);
}
static int alloc_descs(unsigned int start, unsigned int cnt, int node,
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 09/14] genirq: Expose irq_find_desc_at_or_after() in core code
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (7 preceding siblings ...)
2026-03-20 13:22 ` [patch v2 08/14] genirq: Add rcuref count to struct irq_desc Thomas Gleixner
@ 2026-03-20 13:22 ` Thomas Gleixner
2026-03-26 19:13 ` Dmitry Ilvokhin
2026-03-20 13:22 ` [patch v2 10/14] genirq/proc: Speed up /proc/interrupts iteration Thomas Gleixner
` (5 subsequent siblings)
14 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:22 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
--- in preparation for a smarter iterator for /proc/interrupts.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
kernel/irq/internals.h | 2 ++
kernel/irq/irqdesc.c | 12 +++++-------
2 files changed, 7 insertions(+), 7 deletions(-)
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -155,6 +155,8 @@ void irq_proc_calc_prec(void);
static inline void irq_proc_calc_prec(void) { }
#endif
+struct irq_desc *irq_find_desc_at_or_after(unsigned int offset);
+
extern bool irq_can_set_affinity_usr(unsigned int irq);
extern int irq_do_set_affinity(struct irq_data *data,
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -181,15 +181,11 @@ static int irq_find_free_area(unsigned i
return mas.index;
}
-static unsigned int irq_find_at_or_after(unsigned int offset)
+struct irq_desc *irq_find_desc_at_or_after(unsigned int offset)
{
unsigned long index = offset;
- struct irq_desc *desc;
-
- guard(rcu)();
- desc = mt_find(&sparse_irqs, &index, total_nr_irqs);
- return desc ? irq_desc_get_irq(desc) : total_nr_irqs;
+ return mt_find(&sparse_irqs, &index, total_nr_irqs);
}
static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
@@ -934,7 +930,9 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
*/
unsigned int irq_get_next_irq(unsigned int offset)
{
- return irq_find_at_or_after(offset);
+ struct irq_desc *desc = irq_find_desc_at_or_after(offset);
+
+ return desc ? irq_desc_get_irq(desc) : total_nr_irqs;
}
struct irq_desc *__irq_get_desc_lock(unsigned int irq, unsigned long *flags, bool bus,
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 10/14] genirq/proc: Speed up /proc/interrupts iteration
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (8 preceding siblings ...)
2026-03-20 13:22 ` [patch v2 09/14] genirq: Expose irq_find_desc_at_or_after() in core code Thomas Gleixner
@ 2026-03-20 13:22 ` Thomas Gleixner
2026-03-20 13:22 ` [patch v2 11/14] [RFC] genirq: Cache target CPU for single CPU affinities Thomas Gleixner
` (4 subsequent siblings)
14 siblings, 0 replies; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:22 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
Reading /proc/interrupts iterates over the interrupt number space one by
one and looks up the descriptors one by one. That's just a waste of time.
When CONFIG_GENERIC_IRQ_SHOW is enabled this can utilize the maple tree and
cache the descriptor pointer efficiently for the sequence file operations.
Implement a CONFIG_GENERIC_IRQ_SHOW specific version in the core code and
leave the fs/proc/ variant for the legacy architectures which ignore generic
code.
This reduces the time wasted for looking up the next record significantly.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
fs/proc/Makefile | 4 +-
kernel/irq/proc.c | 99 +++++++++++++++++++++++++++++++++++++++++++-----------
2 files changed, 83 insertions(+), 20 deletions(-)
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -16,7 +16,9 @@ proc-y += cmdline.o
proc-y += consoles.o
proc-y += cpuinfo.o
proc-y += devices.o
-proc-y += interrupts.o
+ifneq ($(CONFIG_GENERIC_IRQ_SHOW),y)
+proc-y += interrupts.o
+endif
proc-y += loadavg.o
proc-y += meminfo.o
proc-y += stat.o
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -452,6 +452,8 @@ void irq_proc_update_valid(struct irq_de
#ifdef CONFIG_GENERIC_IRQ_SHOW
+#define ARCH_PROC_IRQDESC ((void *)0x00001111)
+
int __weak arch_show_interrupts(struct seq_file *p, int prec)
{
return 0;
@@ -508,34 +510,29 @@ void irq_proc_emit_counts(struct seq_fil
irq_proc_emit_zero_counts(p, zeros);
}
-int show_interrupts(struct seq_file *p, void *v)
+static int irq_seq_show(struct seq_file *p, void *v)
{
- int prec = READ_ONCE(irq_num_prec);
-
- int i = *(loff_t *) v, j;
+ int prec = (int)(unsigned long)p->private;
+ struct irq_desc *desc = v;
struct irqaction *action;
- struct irq_desc *desc;
-
- if (i > ACTUAL_NR_IRQS)
- return 0;
- if (i == ACTUAL_NR_IRQS)
+ if (desc == ARCH_PROC_IRQDESC)
return arch_show_interrupts(p, prec);
- /* print header and calculate the width of the first column */
- if (i == 0) {
+ /* print header for the first interrupt indicated by !p>private */
+ if (!prec) {
+ unsigned int cpu;
+
+ prec = READ_ONCE(irq_num_prec);
seq_printf(p, "%*s", prec + 8, "");
- for_each_online_cpu(j)
- seq_printf(p, "CPU%-8d", j);
+ for_each_online_cpu(cpu)
+ seq_printf(p, "CPU%-8d", cpu);
seq_putc(p, '\n');
+ p->private = (void *)(unsigned long)prec;
}
- guard(rcu)();
- desc = irq_to_desc(i);
- if (!desc || !irq_settings_proc_valid(desc))
- return 0;
-
- seq_printf(p, "%*d:", prec, i);
+ seq_put_decimal_ull_width(p, "", irq_desc_get_irq(desc), prec);
+ seq_putc(p, ':');
/*
* Always output per CPU interrupts. Output device interrupts only when
@@ -582,4 +579,68 @@ int show_interrupts(struct seq_file *p,
seq_putc(p, '\n');
return 0;
}
+
+static void *irq_seq_next_desc(loff_t *pos)
+{
+ struct irq_desc *desc;
+
+ if (*pos > total_nr_irqs)
+ return NULL;
+
+ guard(rcu)();
+ for (;;) {
+ desc = irq_find_desc_at_or_after((unsigned int) *pos);
+ if (desc) {
+ *pos = irq_desc_get_irq(desc);
+ /*
+ * If valid for output try to acquire a reference count
+ * on the descriptor so that it can't be freed after
+ * dropping RCU read lock on return.
+ */
+ if (irq_settings_proc_valid(desc) && irq_desc_get_ref(desc))
+ return desc;
+ (*pos)++;
+ } else {
+ *pos = total_nr_irqs;
+ return ARCH_PROC_IRQDESC;
+ }
+ }
+}
+
+static void *irq_seq_start(struct seq_file *f, loff_t *pos)
+{
+ if (!*pos)
+ f->private = NULL;
+ return irq_seq_next_desc(pos);
+}
+
+static void *irq_seq_next(struct seq_file *f, void *v, loff_t *pos)
+{
+ if (v && v != ARCH_PROC_IRQDESC)
+ irq_desc_put_ref(v);
+
+ (*pos)++;
+ return irq_seq_next_desc(pos);
+}
+
+static void irq_seq_stop(struct seq_file *f, void *v)
+{
+ if (v && v != ARCH_PROC_IRQDESC)
+ irq_desc_put_ref(v);
+}
+
+static const struct seq_operations irq_seq_ops = {
+ .start = irq_seq_start,
+ .next = irq_seq_next,
+ .stop = irq_seq_stop,
+ .show = irq_seq_show,
+};
+
+static int __init irq_proc_init(void)
+{
+ proc_create_seq("interrupts", 0, NULL, &irq_seq_ops);
+ return 0;
+}
+fs_initcall(irq_proc_init);
+
#endif
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 11/14] [RFC] genirq: Cache target CPU for single CPU affinities
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (9 preceding siblings ...)
2026-03-20 13:22 ` [patch v2 10/14] genirq/proc: Speed up /proc/interrupts iteration Thomas Gleixner
@ 2026-03-20 13:22 ` Thomas Gleixner
2026-03-20 13:22 ` [patch v2 12/14] [RFC] genirq/proc: Provide binary statistic interface Thomas Gleixner
` (3 subsequent siblings)
14 siblings, 0 replies; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:22 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
Some places can be optimized by caching the target CPU for single CPU
affinities. That avoids finding the single CPU in the effective affinity
mask. Provide infrastructure for that.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
include/linux/irq.h | 17 +++++++++++++++++
kernel/irq/manage.c | 14 ++++++++++----
2 files changed, 27 insertions(+), 4 deletions(-)
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -140,6 +140,8 @@ struct irq_domain;
* @effective_affinity: The effective IRQ affinity on SMP as some irq
* chips do not allow multi CPU destinations.
* A subset of @affinity.
+ * @target_cpu: The target CPU when @effective_affinity contains
+ * only a single CPU, IRQ_TARGET_MULTI_CPU otherwise
* @msi_desc: MSI descriptor
* @ipi_offset: Offset of first IPI target cpu in @affinity. Optional.
*/
@@ -155,6 +157,7 @@ struct irq_common_data {
#endif
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
cpumask_var_t effective_affinity;
+ unsigned int target_cpu;
#endif
#ifdef CONFIG_GENERIC_IRQ_IPI
unsigned int ipi_offset;
@@ -903,6 +906,8 @@ static inline const struct cpumask *irq_
return d ? irq_data_get_affinity_mask(d) : NULL;
}
+#define IRQ_TARGET_MULTI_CPU UINT_MAX
+
#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK
static inline
const struct cpumask *irq_data_get_effective_affinity_mask(struct irq_data *d)
@@ -914,6 +919,14 @@ static inline void irq_data_update_effec
{
cpumask_copy(d->common->effective_affinity, m);
}
+static inline unsigned int irq_data_get_single_target(struct irq_data *d)
+{
+ return d->common->target_cpu;
+}
+static inline void irq_data_set_single_target(struct irq_data *d, unsigned int cpu)
+{
+ d->common->target_cpu = cpu;
+}
#else
static inline void irq_data_update_effective_affinity(struct irq_data *d,
const struct cpumask *m)
@@ -924,6 +937,10 @@ const struct cpumask *irq_data_get_effec
{
return irq_data_get_affinity_mask(d);
}
+static inline unsigned int irq_data_get_single_target(struct irq_data *d)
+{
+ return IRQ_TARGET_MULTI_CPU;
+}
#endif
static inline
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -217,11 +217,17 @@ static void irq_validate_effective_affin
{
const struct cpumask *m = irq_data_get_effective_affinity_mask(data);
struct irq_chip *chip = irq_data_get_irq_chip(data);
+ unsigned int target = IRQ_TARGET_MULTI_CPU;
- if (!cpumask_empty(m))
- return;
- pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n",
- chip->name, data->irq);
+ switch (cpumask_weight(m)) {
+ case 0:
+ pr_warn_once("irq_chip %s did not update eff. affinity mask of irq %u\n",
+ chip->name, data->irq);
+ break;
+ case 1:
+ target = cpumask_first(m);
+ }
+ irq_data_set_single_target(data, target);
}
#else
static inline void irq_validate_effective_affinity(struct irq_data *data) { }
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 12/14] [RFC] genirq/proc: Provide binary statistic interface
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (10 preceding siblings ...)
2026-03-20 13:22 ` [patch v2 11/14] [RFC] genirq: Cache target CPU for single CPU affinities Thomas Gleixner
@ 2026-03-20 13:22 ` Thomas Gleixner
2026-03-20 13:22 ` [patch v2 13/14] [RFC] genirq/proc: Provide architecture specific binary statistics Thomas Gleixner
` (2 subsequent siblings)
14 siblings, 0 replies; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:22 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
/proc/interrupts is expensive to evaluate for monitoring because:
- it is text based and contains a lot of information which is not
relevant for interrupt frequency analysis. Due to the extra information
like chip name, hardware interrupt number, interrupt action names, it
has to take the interrupt descriptor lock to output those items into
the seq_file buffer. That obviously interferes with high frequency
interrupt workloads.
- it contains both device interrupts, per CPU and architecture specific
interrupt counters without being able to look at them separately. The
file is seekable by some definition of seekable as the position can
change when interrupts are requested or freed, so the data has to be
read completely to get a coherent picture.
- it emits records for requested interrupts even if their interrupt count
is zero.
- it always prints the per CPU counters even if all but one of them are
zero.
- converting numbers to text and then parsing the text back to numbers in
user space is a pretty wasteful exercise
Provide a new interface which addresses the above pain points:
1) The interface is binary and emits variable length records per
interrupt. Each record starts with a header containing the interrupt
number and the number of data entries following the header. The data
entries consist of a CPU number and count pair.
2) Interrupts with a total count of zero are skipped and produce no
output at all.
3) Interrupts which have a single CPU affinity either due to a restricted
affinity mask or due to the underlying interrupt chip restricting a
mask to a single CPU target emit only one data entry.
That means they are not emitting the stale counts on previous target
CPUs but they are not really interesting for interrupt frequency
analysis as they are not changing and therefore pointless for
accounting.
4) The interface separates device interrupts, per CPU interrupts and
architecture specific interrupts.
Per CPU and architecture specific interrupts can only be monitored,
while device interrupts can also be steered by changing the affinity
unless they are affinity managed by the kernel.
Per CPU interrupts are only available on architectures, e.g. ARM64,
which use the regular interrupt descriptor mechanism for per CPU
interrupt handling.
Architectures which have their own mechanics, e.g. x86, do not enable
and provide the per CPU interface as those interrupts are covered by
the architecture specific accounting.
5) The readout is fully lockless so it does not interfere with concurrent
interrupt handling.
6) Seek is restricted to seek(fd, 0, SEEK_SET) as that's the only
operation which makes sense due to the variable record length and the
dynamics of interrupt request/free operations which influence the
position of the records in the output. For all other seek()
invocations return the current file position, which makes e.g. python
happy as an error code causes the file open checks to mark the
resulting file object non-seekable.
Implement support for /proc/irq/device_stats and /proc/irq/percpu_stats.
The support for architecture specific interrupt statistics is added in a
separate step.
Reading /proc/irq/device_stats on a 256 CPU x86 machine with 83 requested
interrupts produces 13 records due to skipping zero count interrupts. It
results in 13 * 16 = 208 bytes of data as all device interrupts on x86 are
single CPU targeted. That readout takes ~8us time in the kernel, while the
full /proc/interrupts readout takes about 360us.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
include/uapi/linux/irqstats.h | 27 +++
kernel/irq/Kconfig | 3
kernel/irq/proc.c | 314 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 344 insertions(+)
--- /dev/null
+++ b/include/uapi/linux/irqstats.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
+#ifndef LINUX_UAPI_IRQSTATS_H
+#define LINUX_UAPI_IRQSTATS_H
+
+/**
+ * irq_proc_stat_cpu - Data record for /proc/irq/stats
+ * @cpu: The CPU associated to @cnt
+ * @cnt: The count assiciated to @cpu
+ */
+struct irq_proc_stat_cpu {
+ unsigned int cpu;
+ unsigned int cnt;
+};
+
+/**
+ * irq_proc_stat_data - Data header for /proc/irq/stats
+ * @irqnr: The interrupt number
+ * @entries: The number of records (max. nr_cpu_ids)
+ * @pcpu: Runtime sized array of per CPU stat records
+ */
+struct irq_proc_stat_data {
+ unsigned int irqnr;
+ unsigned int entries;
+ struct irq_proc_stat_cpu pcpu[];
+};
+
+#endif
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -18,6 +18,9 @@ config GENERIC_IRQ_SHOW
config GENERIC_IRQ_SHOW_LEVEL
bool
+config GENERIC_IRQ_STATS_PERCPU
+ bool
+
# Supports effective affinity mask
config GENERIC_IRQ_EFFECTIVE_AFF_MASK
depends on SMP
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -13,6 +13,8 @@
#include <linux/kernel_stat.h>
#include <linux/mutex.h>
#include <linux/string.h>
+#include <linux/uio.h>
+#include <uapi/linux/irqstats.h>
#include "internals.h"
@@ -636,9 +638,321 @@ static const struct seq_operations irq_s
.show = irq_seq_show,
};
+/*
+ * /proc/irq/stats related code
+ *
+ * /proc/irq/stats provides variable record sized statistics for device
+ * interrupts.
+ */
+struct irq_proc_stat {
+ unsigned int irqnr;
+ bool percpu;
+ bool first;
+ size_t from;
+ size_t count;
+ loff_t read_pos;
+ struct irq_desc *desc;
+ struct irq_proc_stat_data *data;
+};
+
+static inline bool irq_stat_valid_irq(struct irq_proc_stat *s)
+{
+ struct irq_desc *desc = s->desc;
+
+ /* Check for general validity */
+ if (!irq_settings_proc_valid(desc))
+ return false;
+
+ if (!s->percpu) {
+ /*
+ * Device interrupts update desc::tot_count. Per CPU
+ * interrupts are not touching that fields due to the
+ * obvious concurrency issues. For device interrupts it's
+ * therefore sufficient to evaluate desc::tot_count.
+ */
+ if (!data_race(desc->tot_count))
+ return false;
+ } else {
+ /*
+ * Per CPU interrupts are marked accordingly in the
+ * settings.
+ */
+ if (!irq_settings_is_per_cpu(desc) && !irq_settings_is_per_cpu_devid(desc))
+ return false;
+ }
+
+ /* Try to get a reference to prevent freeing before it's evaluated */
+ return irq_desc_get_ref(desc);
+}
+
+static inline bool irq_stat_find_irq(struct irq_proc_stat *s)
+{
+ /* Loop until a valid interrupt is found */
+ guard(rcu)();
+ for (;; s->irqnr++) {
+ s->desc = irq_find_desc_at_or_after(s->irqnr);
+ /* NULL means there is no interrupt anymore in the maple tree */
+ if (!s->desc) {
+ s->irqnr = total_nr_irqs;
+ return false;
+ }
+
+ /* Save the interrupt number for the next search */
+ s->irqnr = irq_desc_get_irq(s->desc);
+
+ if (irq_stat_valid_irq(s))
+ return true;
+ }
+}
+
+static inline void irq_stat_next_irq(struct irq_proc_stat *s)
+{
+ s->irqnr++;
+ irq_stat_find_irq(s);
+}
+
+static void irq_dev_stat_update_one(struct irq_proc_stat *s)
+{
+ struct irq_proc_stat_data *d = s->data;
+ struct irq_desc *desc = s->desc;
+ struct irq_data *irqd;
+ unsigned int cpu;
+
+ /*
+ * Optimize for single CPU target affinities. Otherwise walk the
+ * effective affinity mask, which falls back to the real affinity
+ * mask if the architecture does not support effective affinity
+ * masks. Bad luck...
+ */
+ irqd = irq_desc_get_irq_data(desc);
+ cpu = irq_data_get_single_target(irqd);
+ if (cpu < nr_cpu_ids) {
+ struct irq_proc_stat_cpu pcpu = {
+ .cpu = cpu,
+ .cnt = data_race(per_cpu(desc->kstat_irqs->cnt, cpu)),
+ };
+
+ if (pcpu.cnt)
+ d->pcpu[d->entries++] = pcpu;
+ } else {
+ const struct cpumask *m = irq_data_get_effective_affinity_mask(irqd);
+
+ for_each_cpu(cpu, m) {
+ struct irq_proc_stat_cpu pcpu = {
+ .cpu = cpu,
+ .cnt = data_race(per_cpu(desc->kstat_irqs->cnt, cpu)),
+ };
+
+ if (pcpu.cnt)
+ d->pcpu[d->entries++] = pcpu;
+ }
+ }
+}
+
+static void irq_percpu_stat_update_one(struct irq_proc_stat *s)
+{
+ struct irq_proc_stat_data *d = s->data;
+ struct irq_desc *desc = s->desc;
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu) {
+ struct irq_proc_stat_cpu pcpu = {
+ .cpu = cpu,
+ .cnt = data_race(per_cpu(desc->kstat_irqs->cnt, cpu)),
+ };
+
+ if (pcpu.cnt)
+ d->pcpu[d->entries++] = pcpu;
+ }
+}
+
+static bool irq_stat_update_one(struct irq_proc_stat *s)
+{
+ struct irq_proc_stat_data *d = s->data;
+
+ if (IS_ENABLED(CONFIG_GENERIC_IRQ_PERCPU_STATS) && s->percpu)
+ irq_percpu_stat_update_one(s);
+ else
+ irq_dev_stat_update_one(s);
+
+ /* Only output data if there is an actual count */
+ if (d->entries) {
+ d->irqnr = s->irqnr;
+ s->count = sizeof(*d) + d->entries * sizeof(*d->pcpu);
+ }
+
+ /* Drop the reference count which got acquired in irq_stat_find_irq() */
+ irq_desc_put_ref(s->desc);
+ s->desc = NULL;
+ return !!s->count;
+}
+
+static __always_inline bool irq_stat_next_data(struct irq_proc_stat *s)
+{
+ /*
+ * On the first read or after a lseek(fd, 0, SEEK_SET) find the
+ * first interrupt. Otherwise find the next one.
+ */
+ if (unlikely(s->first)) {
+ s->irqnr = 0;
+ s->first = false;
+ irq_stat_find_irq(s);
+ } else {
+ irq_stat_next_irq(s);
+ }
+
+ /* Repeat until an interrupt with non-zero counts is found */
+ for (; s->desc; irq_stat_next_irq(s)) {
+ if (irq_stat_update_one(s))
+ return true;
+ }
+ return false;
+}
+
+static size_t irq_stat_copy_to_iter(struct irq_proc_stat *s, struct iov_iter *iter)
+{
+ size_t n = copy_to_iter(((char *)s->data) + s->from, s->count, iter);
+
+ s->count -= n;
+ s->from += n;
+ return n;
+}
+
+/* Force inline as otherwise next() becomes a indirect call */
+static __always_inline ssize_t __irq_stats_read(struct kiocb *iocb, struct iov_iter *iter,
+ bool (*next)(struct irq_proc_stat *))
+{
+ struct irq_proc_stat *s = iocb->ki_filp->private_data;
+ size_t copied = 0;
+
+ /* Real seek is not supported. See irq_stat_lseek() */
+ if (WARN_ON_ONCE(iocb->ki_pos != s->read_pos))
+ goto done;
+
+ if (s->count)
+ copied += irq_stat_copy_to_iter(s, iter);
+
+ for (; !s->count;) {
+ s->count = s->from = 0;
+ s->data->entries = 0;
+
+ if (!next(s))
+ goto done;
+ copied += irq_stat_copy_to_iter(s, iter);
+ }
+
+ if (!copied)
+ return -EFAULT;
+done:
+ iocb->ki_pos += copied;
+ s->read_pos += copied;
+ return copied;
+}
+
+static ssize_t irq_stats_read(struct kiocb *iocb, struct iov_iter *iter)
+{
+ return __irq_stats_read(iocb, iter, irq_stat_next_data);
+}
+
+static loff_t irq_stats_llseek(struct file *filp, loff_t offset, int whence)
+{
+ struct irq_proc_stat *s = filp->private_data;
+ loff_t ret;
+
+ /*
+ * As this is a variable record interface and the actual use case is to
+ * get a full snapshot of the active interrupts, there is no point in
+ * trying to be fully seekable. Just support rewind to the beginning of
+ * the data set. For all other operations return the current position
+ * which makes e.g. python happy.
+ */
+ if (whence != SEEK_SET || offset)
+ return noop_llseek(filp, offset, whence);
+
+ ret = default_llseek(filp, 0, SEEK_SET);
+ if (ret < 0)
+ return ret;
+
+ /* Reset the position, drop any leftovers and indicate to start over */
+ s->read_pos = 0;
+ s->count = 0;
+ s->first = true;
+ return 0;
+}
+
+static int __irq_stats_open(struct inode *inode, struct file *filp, bool percpu)
+{
+ struct irq_proc_stat *s = kzalloc_obj(*s);
+
+ if (!s)
+ return -ENOMEM;
+
+ s->data = kzalloc_flex(*s->data, pcpu, num_possible_cpus());
+ if (!s->data) {
+ kfree(s);
+ return -ENOMEM;
+ }
+
+ s->first = true;
+ s->percpu = percpu;
+ filp->private_data = s;
+ return 0;
+}
+
+static int irq_stats_open(struct inode *inode, struct file *filp)
+{
+ return __irq_stats_open(inode, filp, false);
+}
+
+static int irq_stats_release(struct inode *inode, struct file *filp)
+{
+ struct irq_proc_stat *s = filp->private_data;
+
+ if (s) {
+ kfree(s->data);
+ kfree(s);
+ }
+ return 0;
+}
+
+static const struct proc_ops irq_dev_stat_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
+ .proc_open = irq_stats_open,
+ .proc_release = irq_stats_release,
+ .proc_read_iter = irq_stats_read,
+ .proc_lseek = irq_stats_llseek,
+};
+
+#ifdef CONFIG_GENERIC_IRQ_STATS_PERCPU
+static int irq_pcp_stats_open(struct inode *inode, struct file *filp)
+{
+ return __irq_stats_open(inode, filp, true);
+}
+
+static const struct proc_ops irq_pcp_stat_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
+ .proc_open = irq_pcp_stats_open,
+ .proc_release = irq_stats_release,
+ .proc_read_iter = irq_stats_read,
+ .proc_lseek = irq_stats_llseek,
+};
+
+static __init void irq_pcp_stats_init(void)
+{
+ proc_create("percpu_stats", 0, root_irq_dir, &irq_pcp_stat_ops);
+}
+#else /* CONFIG_GENERIC_IRQ_STATS_PERCPU */
+static inline void irq_pcp_stats_init(void) { }
+#endif /* !CONFIG_GENERIC_IRQ_STATS_PERCPU */
+
static int __init irq_proc_init(void)
{
proc_create_seq("interrupts", 0, NULL, &irq_seq_ops);
+ if (!root_irq_dir)
+ return 0;
+
+ proc_create("device_stats", 0, root_irq_dir, &irq_dev_stat_ops);
+ irq_pcp_stats_init();
return 0;
}
fs_initcall(irq_proc_init);
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 13/14] [RFC] genirq/proc: Provide architecture specific binary statistics
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (11 preceding siblings ...)
2026-03-20 13:22 ` [patch v2 12/14] [RFC] genirq/proc: Provide binary statistic interface Thomas Gleixner
@ 2026-03-20 13:22 ` Thomas Gleixner
2026-03-20 13:22 ` [patch v2 14/14] [RFC] x86/irq: Hook up architecture specific stats Thomas Gleixner
2026-03-20 16:45 ` [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Michael Kelley
14 siblings, 0 replies; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:22 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
Provide a binary statistics interface similar to the per device and per CPU
interfaces to access the architecture specific interrupt statistics.
The architecture has to select it in Kconfig and provide an accessor to the
per CPU interrupt information and the number of architecture specific
entries.
The entries are ordered by a numerical index starting from 0, which
corresponds to the ordering of those interrupts in /proc/interrupt. The
output format is the same as for the per device and per CPU interfaces and
only contains entries which have an interrupt count > 0.
Reading the architecture specific counters of a 256 CPU x86 system takes
36us kernel time for 6 interrupts with non-zero counts and produces about
10k of data.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
kernel/irq/Kconfig | 3 ++
kernel/irq/proc.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 59 insertions(+)
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -18,6 +18,9 @@ config GENERIC_IRQ_SHOW
config GENERIC_IRQ_SHOW_LEVEL
bool
+config GENERIC_IRQ_STATS_ARCH
+ bool
+
config GENERIC_IRQ_STATS_PERCPU
bool
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -945,6 +945,61 @@ static __init void irq_pcp_stats_init(vo
static inline void irq_pcp_stats_init(void) { }
#endif /* !CONFIG_GENERIC_IRQ_STATS_PERCPU */
+#ifdef CONFIG_GENERIC_IRQ_STATS_ARCH
+static inline void arch_stat_update_one(struct irq_proc_stat *s)
+{
+ struct irq_proc_stat_data *d = s->data;
+ unsigned int cpu, idx = s->irqnr;
+
+ for_each_online_cpu(cpu) {
+ struct irq_proc_stat_cpu pcpu = {
+ .cpu = cpu,
+ .cnt = arch_get_irq_stat(cpu, idx),
+ };
+
+ if (pcpu.cnt)
+ d->pcpu[d->entries++] = pcpu;
+ }
+
+ if (d->entries) {
+ d->irqnr = idx;
+ s->count = sizeof(*d) + d->entries * sizeof(*d->pcpu);
+ }
+}
+
+static __always_inline bool arch_stat_next_data(struct irq_proc_stat *s)
+{
+ if (unlikely(s->first)) {
+ s->irqnr = 0;
+ s->first = false;
+ }
+
+ for(; !s->count && s->irqnr < ARCH_IRQ_STATS_NUM_IRQS; s->irqnr++)
+ arch_stat_update_one(s);
+ return !!s->count;
+}
+
+static ssize_t irq_arch_stats_read(struct kiocb *iocb, struct iov_iter *iter)
+{
+ return __irq_stats_read(iocb, iter, arch_stat_next_data);
+}
+
+static const struct proc_ops irq_arch_stat_ops = {
+ .proc_flags = PROC_ENTRY_PERMANENT,
+ .proc_open = irq_stats_open,
+ .proc_release = irq_stats_release,
+ .proc_read_iter = irq_arch_stats_read,
+ .proc_lseek = irq_stats_llseek,
+};
+
+static __init void irq_arch_stats_init(void)
+{
+ proc_create("arch_stats", 0, root_irq_dir, &irq_arch_stat_ops);
+}
+#else /* CONFIG_GENERIC_IRQ_STATS_ARCH */
+static inline void irq_arch_stats_init(void) { }
+#endif /* !CONFIG_GENERIC_IRQ_STATS_ARCH */
+
static int __init irq_proc_init(void)
{
proc_create_seq("interrupts", 0, NULL, &irq_seq_ops);
@@ -953,6 +1008,7 @@ static int __init irq_proc_init(void)
proc_create("device_stats", 0, root_irq_dir, &irq_dev_stat_ops);
irq_pcp_stats_init();
+ irq_arch_stats_init();
return 0;
}
fs_initcall(irq_proc_init);
^ permalink raw reply [flat|nested] 43+ messages in thread
* [patch v2 14/14] [RFC] x86/irq: Hook up architecture specific stats
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (12 preceding siblings ...)
2026-03-20 13:22 ` [patch v2 13/14] [RFC] genirq/proc: Provide architecture specific binary statistics Thomas Gleixner
@ 2026-03-20 13:22 ` Thomas Gleixner
2026-03-20 16:45 ` [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Michael Kelley
14 siblings, 0 replies; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-20 13:22 UTC (permalink / raw)
To: LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Radu Rendec
Enable the binary statistics interface for architecture specific
interrupts.
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/hardirq.h | 7 +++++++
2 files changed, 8 insertions(+)
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -177,6 +177,7 @@ config X86
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_RESERVATION_MODE
select GENERIC_IRQ_SHOW
+ select GENERIC_IRQ_STATS_ARCH
select GENERIC_PENDING_IRQ if SMP
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -65,7 +65,14 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpust
#ifdef CONFIG_X86_POSTED_MSI
DECLARE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc);
#endif
+
#define __ARCH_IRQ_STAT
+#define ARCH_IRQ_STATS_NUM_IRQS IRQ_COUNT_MAX
+
+static inline unsigned int arch_get_irq_stat(unsigned int cpu, unsigned int idx)
+{
+ return data_race(per_cpu_ptr(&irq_stat, cpu)->counts[idx]);
+}
#define inc_irq_stat(index) this_cpu_inc(irq_stat.counts[IRQ_COUNT_##index])
^ permalink raw reply [flat|nested] 43+ messages in thread
* RE: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-20 13:21 ` [patch v2 04/14] x86/irq: Make irqstats array based Thomas Gleixner
@ 2026-03-20 16:39 ` Michael Kelley
2026-03-21 16:38 ` Thomas Gleixner
2026-03-23 19:24 ` Radu Rendec
1 sibling, 1 reply; 43+ messages in thread
From: Michael Kelley @ 2026-03-20 16:39 UTC (permalink / raw)
To: Thomas Gleixner, LKML
Cc: x86@kernel.org, Dmitry Ilvokhin, Neil Horman, Radu Rendec
From: Thomas Gleixner <tglx@kernel.org> Sent: Friday, March 20, 2026 6:22 AM
>
> Having the x86 specific interrupt statistics as a data structure with
> individual members instead of an array is just stupid as it requires
> endless copy and paste in arch_show_interrupts() and arch_irq_stat_cpu(),
> where the latter does not even take the latest interrupt additions into
> account. The resulting #ifdef orgy is just disgusting.
>
> Convert it to an array of counters, which does not make a difference in the
> actual interrupt hotpath increment as the array index is constant and
> therefore not any different than the member based access.
>
> But in arch_show_interrupts() and arch_irq_stat_cpu() this just turns into
> a loop, which reduces the text size by ~2k (~12%):
>
> text data bss dec hex filename
> 19643 15250 904 35797 8bd5 ../build/arch/x86/kernel/irq.o
> 17355 15250 904 33509 82e5 ../build/arch/x86/kernel/irq.o
>
> Adding a new vector or software counter only requires to update the table
> and everything just works. Using the core provided emit function which
> speeds up 0 outputs makes it significantly faster.
>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> ---
> V2: Simplified and extended vector skip mechanism
> Fixup the typoes - Micheal, Dmitry
> Added the lost precision back for ERR/MIS - Dmitry
[snip]
> #if IS_ENABLED(CONFIG_HYPERV)
> - if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
> - seq_printf(p, "%*s:", prec, "HRE");
> - for_each_online_cpu(j)
> - put_decimal(p,
> - irq_stats(j)->irq_hv_reenlightenment_count);
> - seq_puts(p, " Hyper-V reenlightenment interrupts\n");
> - }
> - if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) {
> - seq_printf(p, "%*s:", prec, "HVS");
> - for_each_online_cpu(j)
> - put_decimal(p, irq_stats(j)->hyperv_stimer0_count);
> - seq_puts(p, " Hyper-V stimer0 interrupts\n");
> - }
> -#endif
> - seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
> -#if defined(CONFIG_X86_IO_APIC)
> - seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
> + ITS(HYPERV_REENLIGHTENMENT, "HRE", " Hyper-V reenlightment interrupts\n"),
There's still a typo in the description string:
s/reenlightment/reenlightenment/
Michael
> + ITS(HYPERV_STIMER0, "HVS", " Hyper-V stimer0 interrupts\n"),
> #endif
^ permalink raw reply [flat|nested] 43+ messages in thread
* RE: [patch v2 00/14] Improve /proc/interrupts further and add a binary interface
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
` (13 preceding siblings ...)
2026-03-20 13:22 ` [patch v2 14/14] [RFC] x86/irq: Hook up architecture specific stats Thomas Gleixner
@ 2026-03-20 16:45 ` Michael Kelley
14 siblings, 0 replies; 43+ messages in thread
From: Michael Kelley @ 2026-03-20 16:45 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86@kernel.org, Dmitry Ilvokhin
From: Thomas Gleixner <tglx@kernel.org> Sent: Friday, March 20, 2026 6:21 AM
>
> This is a follow up to v1 which can be found here:
>
> https://lore.kernel.org/20260303150539.513068586@kernel.org
>
> The v1 cover letter contains a full analysis, explanation and numbers.
>
> TLDR:
>
> - The performance of reading of /proc/interrupts has been improved
> piecewise over the years, but most of the low hanging fruit has been
> left on the table.
>
> - For a long time a binary readout interface was considered to be the
> better option, but it never materialized.
>
> The series fixes the real big performance issues and provides a design
> study for a binary interface.
>
Tested in Hyper-V guests on x86/x64 and arm64. Did basic smoke tests
of taking a CPU offline, and removing a PCI device along with its IRQs,
then adding them back again. No issues seen except for a typo in the
Hyper-V reenlightenment IRQ text description as noted in my reply to
Patch 4 of the series. I did not do anything with the new binary
interface.
For the series (excluding the binary interface),
Tested-by: Michael Kelley <mhklinux@outlook.com>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 01/14] x86/irq: Optimize interrupts decimals printing
2026-03-20 13:21 ` [patch v2 01/14] x86/irq: Optimize interrupts decimals printing Thomas Gleixner
@ 2026-03-21 16:10 ` Radu Rendec
0 siblings, 0 replies; 43+ messages in thread
From: Radu Rendec @ 2026-03-21 16:10 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
> From: Dmitry Ilvokhin <d@ilvokhin.com>
>
> Monitoring tools periodically scan /proc/interrupts to export metrics as a
> timeseries for future analysis and investigation.
>
> In large fleets, /proc/interrupts is polled (often every few seconds) on
> every machine. The cumulative overhead adds up quickly across thousands
> of nodes, so reducing the cost of generating these stats does have a
> measurable operational impact. With the ongoing trend toward higher core
> counts per machine, this cost becomes even more noticeable over time,
> since interrupt counters are per-CPU. In Meta's fleet, we have observed
> this overhead at scale.
>
> Although a binary /proc interface would be a better long-term solution
> due to lower formatting (kernel side) and parsing (userspace side)
> overhead, the text interface will remain in use for some time, even if
> better solutions will be available. Optimizing the /proc/interrupts
> printing code is therefore still beneficial.
>
> Function seq_printf() supports rich format string for decimals printing,
> but it doesn't required for printing /proc/interrupts per CPU counters,
> seq_put_decimal_ull_width() function can be used instead to print per
> CPU counters, because very limited formatting is required for this case.
> Similar optimization idea is already used in show_interrupts().
>
> Performance counter stats (truncated) for 'sh -c cat /proc/interrupts
>
> Before:
>
> 3.42 msec task-clock # 0.802 CPUs utilized ( +- 0.05% )
> 1 context-switches # 291.991 /sec ( +- 0.74% )
> 0 cpu-migrations # 0.000 /sec
> 343 page-faults # 100.153 K/sec ( +- 0.01% )
> 8,932,242 instructions # 1.66 insn per cycle ( +- 0.34% )
> 5,374,427 cycles # 1.569 GHz ( +- 0.04% )
> 1,483,154 branches # 433.068 M/sec ( +- 0.22% )
> 28,768 branch-misses # 1.94% of all branches ( +- 0.31% )
>
> 0.00427182 +- 0.00000215 seconds time elapsed ( +- 0.05% )
>
> After:
>
> 2.39 msec task-clock # 0.796 CPUs utilized ( +- 0.06% )
> 1 context-switches # 418.541 /sec ( +- 0.70% )
> 0 cpu-migrations # 0.000 /sec
> 343 page-faults # 143.560 K/sec ( +- 0.01% )
> 7,020,982 instructions # 1.30 insn per cycle ( +- 0.52% )
> 5,397,266 cycles # 2.259 GHz ( +- 0.06% )
> 1,569,648 branches # 656.962 M/sec ( +- 0.08% )
> 25,419 branch-misses # 1.62% of all branches ( +- 0.72% )
>
> 0.00299996 +- 0.00000206 seconds time elapsed ( +- 0.07% )
>
> Relative speed up in time elapsed is around 29%.
>
> [ tglx: Fixed it up so it applies to current mainline ]
>
> Signed-off-by: Dmitry Ilvokhin <d@ilvokhin.com>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Link: https://patch.msgid.link/aQj5mGZ6_BBlAm3B@shell.ilvokhin.com
>
> ---
> Changes v2:
> - Expanded commit message: add more rationale for the proposed change.
> - Renamed helper put_spaced_decimal() -> put_decimal() primarely to make
> checkpatch.pl --strict pass.
>
> arch/x86/kernel/irq.c | 112 ++++++++++++++++++++++++++------------------------
> 1 file changed, 59 insertions(+), 53 deletions(-)
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -62,6 +62,18 @@ void ack_bad_irq(unsigned int irq)
> apic_eoi();
> }
>
> +/*
> + * A helper routine for putting space and decimal number without overhead
> + * from rich format of printf().
> + */
> +static void put_decimal(struct seq_file *p, unsigned long long num)
> +{
> + const char *delimiter = " ";
> + unsigned int width = 10;
> +
> + seq_put_decimal_ull_width(p, delimiter, num, width);
> +}
> +
> #define irq_stats(x) (&per_cpu(irq_stat, x))
> /*
> * /proc/interrupts printing for arch specific interrupts
> @@ -70,103 +82,101 @@ int arch_show_interrupts(struct seq_file
> {
> int j;
>
> - seq_printf(p, "%*s: ", prec, "NMI");
> + seq_printf(p, "%*s:", prec, "NMI");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
> + put_decimal(p, irq_stats(j)->__nmi_count);
> seq_puts(p, " Non-maskable interrupts\n");
> #ifdef CONFIG_X86_LOCAL_APIC
> - seq_printf(p, "%*s: ", prec, "LOC");
> + seq_printf(p, "%*s:", prec, "LOC");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
> + put_decimal(p, irq_stats(j)->apic_timer_irqs);
> seq_puts(p, " Local timer interrupts\n");
>
> - seq_printf(p, "%*s: ", prec, "SPU");
> + seq_printf(p, "%*s:", prec, "SPU");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
> + put_decimal(p, irq_stats(j)->irq_spurious_count);
> seq_puts(p, " Spurious interrupts\n");
> - seq_printf(p, "%*s: ", prec, "PMI");
> + seq_printf(p, "%*s:", prec, "PMI");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
> + put_decimal(p, irq_stats(j)->apic_perf_irqs);
> seq_puts(p, " Performance monitoring interrupts\n");
> - seq_printf(p, "%*s: ", prec, "IWI");
> + seq_printf(p, "%*s:", prec, "IWI");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
> + put_decimal(p, irq_stats(j)->apic_irq_work_irqs);
> seq_puts(p, " IRQ work interrupts\n");
> - seq_printf(p, "%*s: ", prec, "RTR");
> + seq_printf(p, "%*s:", prec, "RTR");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->icr_read_retry_count);
> + put_decimal(p, irq_stats(j)->icr_read_retry_count);
> seq_puts(p, " APIC ICR read retries\n");
> if (x86_platform_ipi_callback) {
> - seq_printf(p, "%*s: ", prec, "PLT");
> + seq_printf(p, "%*s:", prec, "PLT");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
> + put_decimal(p, irq_stats(j)->x86_platform_ipis);
> seq_puts(p, " Platform interrupts\n");
> }
> #endif
> #ifdef CONFIG_SMP
> - seq_printf(p, "%*s: ", prec, "RES");
> + seq_printf(p, "%*s:", prec, "RES");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
> + put_decimal(p, irq_stats(j)->irq_resched_count);
> seq_puts(p, " Rescheduling interrupts\n");
> - seq_printf(p, "%*s: ", prec, "CAL");
> + seq_printf(p, "%*s:", prec, "CAL");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
> + put_decimal(p, irq_stats(j)->irq_call_count);
> seq_puts(p, " Function call interrupts\n");
> - seq_printf(p, "%*s: ", prec, "TLB");
> + seq_printf(p, "%*s:", prec, "TLB");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
> + put_decimal(p, irq_stats(j)->irq_tlb_count);
> seq_puts(p, " TLB shootdowns\n");
> #endif
> #ifdef CONFIG_X86_THERMAL_VECTOR
> - seq_printf(p, "%*s: ", prec, "TRM");
> + seq_printf(p, "%*s:", prec, "TRM");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
> + put_decimal(p, irq_stats(j)->irq_thermal_count);
> seq_puts(p, " Thermal event interrupts\n");
> #endif
> #ifdef CONFIG_X86_MCE_THRESHOLD
> - seq_printf(p, "%*s: ", prec, "THR");
> + seq_printf(p, "%*s:", prec, "THR");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
> + put_decimal(p, irq_stats(j)->irq_threshold_count);
> seq_puts(p, " Threshold APIC interrupts\n");
> #endif
> #ifdef CONFIG_X86_MCE_AMD
> - seq_printf(p, "%*s: ", prec, "DFR");
> + seq_printf(p, "%*s:", prec, "DFR");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->irq_deferred_error_count);
> + put_decimal(p, irq_stats(j)->irq_deferred_error_count);
> seq_puts(p, " Deferred Error APIC interrupts\n");
> #endif
> #ifdef CONFIG_X86_MCE
> - seq_printf(p, "%*s: ", prec, "MCE");
> + seq_printf(p, "%*s:", prec, "MCE");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
> + put_decimal(p, per_cpu(mce_exception_count, j));
> seq_puts(p, " Machine check exceptions\n");
> - seq_printf(p, "%*s: ", prec, "MCP");
> + seq_printf(p, "%*s:", prec, "MCP");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
> + put_decimal(p, per_cpu(mce_poll_count, j));
> seq_puts(p, " Machine check polls\n");
> #endif
> #ifdef CONFIG_X86_HV_CALLBACK_VECTOR
> if (test_bit(HYPERVISOR_CALLBACK_VECTOR, system_vectors)) {
> - seq_printf(p, "%*s: ", prec, "HYP");
> + seq_printf(p, "%*s:", prec, "HYP");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->irq_hv_callback_count);
> + put_decimal(p, irq_stats(j)->irq_hv_callback_count);
> seq_puts(p, " Hypervisor callback interrupts\n");
> }
> #endif
> #if IS_ENABLED(CONFIG_HYPERV)
> if (test_bit(HYPERV_REENLIGHTENMENT_VECTOR, system_vectors)) {
> - seq_printf(p, "%*s: ", prec, "HRE");
> + seq_printf(p, "%*s:", prec, "HRE");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->irq_hv_reenlightenment_count);
> + put_decimal(p,
> + irq_stats(j)->irq_hv_reenlightenment_count);
> seq_puts(p, " Hyper-V reenlightenment interrupts\n");
> }
> if (test_bit(HYPERV_STIMER0_VECTOR, system_vectors)) {
> - seq_printf(p, "%*s: ", prec, "HVS");
> + seq_printf(p, "%*s:", prec, "HVS");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->hyperv_stimer0_count);
> + put_decimal(p, irq_stats(j)->hyperv_stimer0_count);
> seq_puts(p, " Hyper-V stimer0 interrupts\n");
> }
> #endif
> @@ -175,35 +185,31 @@ int arch_show_interrupts(struct seq_file
> seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
> #endif
> #if IS_ENABLED(CONFIG_KVM)
> - seq_printf(p, "%*s: ", prec, "PIN");
> + seq_printf(p, "%*s:", prec, "PIN");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ", irq_stats(j)->kvm_posted_intr_ipis);
> + put_decimal(p, irq_stats(j)->kvm_posted_intr_ipis);
> seq_puts(p, " Posted-interrupt notification event\n");
>
> - seq_printf(p, "%*s: ", prec, "NPI");
> + seq_printf(p, "%*s:", prec, "NPI");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->kvm_posted_intr_nested_ipis);
> + put_decimal(p, irq_stats(j)->kvm_posted_intr_nested_ipis);
> seq_puts(p, " Nested posted-interrupt event\n");
>
> - seq_printf(p, "%*s: ", prec, "PIW");
> + seq_printf(p, "%*s:", prec, "PIW");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->kvm_posted_intr_wakeup_ipis);
> + put_decimal(p, irq_stats(j)->kvm_posted_intr_wakeup_ipis);
> seq_puts(p, " Posted-interrupt wakeup event\n");
> #endif
> #ifdef CONFIG_GUEST_PERF_EVENTS
> - seq_printf(p, "%*s: ", prec, "VPMI");
> + seq_printf(p, "%*s:", prec, "VPMI");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->perf_guest_mediated_pmis);
> + put_decimal(p, irq_stats(j)->perf_guest_mediated_pmis);
> seq_puts(p, " Perf Guest Mediated PMI\n");
> #endif
> #ifdef CONFIG_X86_POSTED_MSI
> - seq_printf(p, "%*s: ", prec, "PMN");
> + seq_printf(p, "%*s:", prec, "PMN");
> for_each_online_cpu(j)
> - seq_printf(p, "%10u ",
> - irq_stats(j)->posted_msi_notification_count);
> + put_decimal(p, irq_stats(j)->posted_msi_notification_count);
> seq_puts(p, " Posted MSI notification event\n");
> #endif
> return 0;
Nit: The patch changes the alignment of the descriptions by 1 space (it
moves the descriptions to the left). If that's intentional, perhaps it
should be added to the description? TBH, I like it better like that
because the description is now aligned with the generic interrupts.
Reviewed-by: Radu Rendec <radu@rendec.net>
^ permalink raw reply [flat|nested] 43+ messages in thread
* RE: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-20 16:39 ` Michael Kelley
@ 2026-03-21 16:38 ` Thomas Gleixner
2026-03-21 20:32 ` Michael Kelley
0 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-21 16:38 UTC (permalink / raw)
To: Michael Kelley, LKML
Cc: x86@kernel.org, Dmitry Ilvokhin, Neil Horman, Radu Rendec
On Fri, Mar 20 2026 at 16:39, Michael Kelley wrote:
> From: Thomas Gleixner <tglx@kernel.org> Sent: Friday, March 20, 2026 6:22 AM
>> - seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
>> -#if defined(CONFIG_X86_IO_APIC)
>> - seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
>> + ITS(HYPERV_REENLIGHTENMENT, "HRE", " Hyper-V reenlightment interrupts\n"),
>
> There's still a typo in the description string:
>
> s/reenlightment/reenlightenment/
I clearly suffer from dyslexia.
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 02/14] genirq/proc: Avoid formatting zero counts in /proc/interrupts
2026-03-20 13:21 ` [patch v2 02/14] genirq/proc: Avoid formatting zero counts in /proc/interrupts Thomas Gleixner
@ 2026-03-21 16:38 ` Radu Rendec
0 siblings, 0 replies; 43+ messages in thread
From: Radu Rendec @ 2026-03-21 16:38 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
> A large portion of interrupt count entries are zero. There is no point in
> formatting the zero value as it is way cheeper to just emit a constant
> string.
>
> Collect the number of consecutive zero counts and emit them in one go
> before a non-zero count and at the end of the line.
>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
> ---
> V2: Use sizeof() for ZSTR1_LEN - Dmitry
> ---
> include/linux/interrupt.h | 1 +
> kernel/irq/proc.c | 42 +++++++++++++++++++++++++++++++++++++-----
> 2 files changed, 38 insertions(+), 5 deletions(-)
>
> --- a/include/linux/interrupt.h
> +++ b/include/linux/interrupt.h
> @@ -864,6 +864,7 @@ static inline void init_irq_proc(void)
> struct seq_file;
> int show_interrupts(struct seq_file *p, void *v);
> int arch_show_interrupts(struct seq_file *p, int prec);
> +void irq_proc_emit_counts(struct seq_file *p, unsigned int __percpu *cnts);
>
> extern int early_irq_init(void);
> extern int arch_probe_nr_irqs(void);
> --- a/kernel/irq/proc.c
> +++ b/kernel/irq/proc.c
> @@ -450,6 +450,42 @@ int __weak arch_show_interrupts(struct s
> # define ACTUAL_NR_IRQS irq_get_nr_irqs()
> #endif
>
> +#define ZSTR1 " 0"
> +#define ZSTR1_LEN (sizeof(ZSTR1) - 1)
> +#define ZSTR16 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 \
> + ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1
> +#define ZSTR256 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 \
> + ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16 ZSTR16
> +
> +static inline void irq_proc_emit_zero_counts(struct seq_file *p, unsigned int zeros)
> +{
> + if (!zeros)
> + return;
> +
> + for (unsigned int n = min(zeros, 256); n; zeros -= n, n = min(zeros, 256))
> + seq_write(p, ZSTR256, n * ZSTR1_LEN);
> +}
> +
> +static inline unsigned int irq_proc_emit_count(struct seq_file *p, unsigned int cnt,
> + unsigned int zeros)
> +{
> + if (!cnt)
> + return zeros + 1;
> +
> + irq_proc_emit_zero_counts(p, zeros);
> + seq_put_decimal_ull_width(p, " ", cnt, 10);
> + return 0;
> +}
> +
> +void irq_proc_emit_counts(struct seq_file *p, unsigned int __percpu *cnts)
> +{
> + unsigned int cpu, zeros = 0;
> +
> + for_each_online_cpu(cpu)
> + zeros = irq_proc_emit_count(p, per_cpu(*cnts, cpu), zeros);
> + irq_proc_emit_zero_counts(p, zeros);
> +}
> +
> int show_interrupts(struct seq_file *p, void *v)
> {
> const unsigned int nr_irqs = irq_get_nr_irqs();
> @@ -485,11 +521,7 @@ int show_interrupts(struct seq_file *p,
> return 0;
>
> seq_printf(p, "%*d:", prec, i);
> - for_each_online_cpu(j) {
> - unsigned int cnt = desc->kstat_irqs ? per_cpu(desc->kstat_irqs->cnt, j) : 0;
> -
> - seq_put_decimal_ull_width(p, " ", cnt, 10);
> - }
> + irq_proc_emit_counts(p, &desc->kstat_irqs->cnt);
> seq_putc(p, ' ');
>
> guard(raw_spinlock_irq)(&desc->lock);
Reviewed-by: Radu Rendec <radu@rendec.net>
^ permalink raw reply [flat|nested] 43+ messages in thread
* RE: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-21 16:38 ` Thomas Gleixner
@ 2026-03-21 20:32 ` Michael Kelley
0 siblings, 0 replies; 43+ messages in thread
From: Michael Kelley @ 2026-03-21 20:32 UTC (permalink / raw)
To: Thomas Gleixner, Michael Kelley, LKML
Cc: x86@kernel.org, Dmitry Ilvokhin, Neil Horman, Radu Rendec
From: Thomas Gleixner <tglx@kernel.org> Sent: Saturday, March 21, 2026 9:38 AM
>
> On Fri, Mar 20 2026 at 16:39, Michael Kelley wrote:
> > From: Thomas Gleixner <tglx@kernel.org> Sent: Friday, March 20, 2026 6:22 AM
> >> - seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
> >> -#if defined(CONFIG_X86_IO_APIC)
> >> - seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
> >> + ITS(HYPERV_REENLIGHTENMENT, "HRE", " Hyper-V reenlightment interrupts\n"),
> >
> > There's still a typo in the description string:
> >
> > s/reenlightment/reenlightenment/
>
> I clearly suffer from dyslexia.
It's still a terrible word. I have a hard time typing it even when
focused on it. :-( But it is what it is. I'm hoping its raison
d'etre will have timed out in a few years and we can
deprecate the entire reenlightenment mechanism.
Michael
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 03/14] genirq/proc: Utilize irq_desc::tot_count to avoid evaluation
2026-03-20 13:21 ` [patch v2 03/14] genirq/proc: Utilize irq_desc::tot_count to avoid evaluation Thomas Gleixner
@ 2026-03-22 19:59 ` Radu Rendec
0 siblings, 0 replies; 43+ messages in thread
From: Radu Rendec @ 2026-03-22 19:59 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
> Interrupts which are not marked per CPU increment not only the per CPU
> statistics, but also the accumulation counter irq_desc::tot_count.
>
> Change the counter to type unsigned long so it does not produce sporadic
> zeros due to wrap arounds on 64-bit machines and do a quick check for non
> per CPU interrupts. If the counter is zero, then simply emit a full set of
> zero strings. That spares the evaluation of the per CPU counters completely
> for interrupts with zero events.
>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
> ---
> include/linux/irqdesc.h | 6 +++---
> kernel/irq/proc.c | 11 ++++++++++-
> 2 files changed, 13 insertions(+), 4 deletions(-)
>
> --- a/include/linux/irqdesc.h
> +++ b/include/linux/irqdesc.h
> @@ -52,8 +52,8 @@ struct irq_redirect {
> * @depth: disable-depth, for nested irq_disable() calls
> * @wake_depth: enable depth, for multiple irq_set_irq_wake() callers
> * @tot_count: stats field for non-percpu irqs
> - * @irq_count: stats field to detect stalled irqs
> * @last_unhandled: aging timer for unhandled count
> + * @irq_count: stats field to detect stalled irqs
> * @irqs_unhandled: stats field for spurious unhandled interrupts
> * @threads_handled: stats field for deferred spurious detection of threaded handlers
> * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers
> @@ -87,9 +87,9 @@ struct irq_desc {
> unsigned int core_internal_state__do_not_mess_with_it;
> unsigned int depth; /* nested irq disables */
> unsigned int wake_depth; /* nested wake enables */
> - unsigned int tot_count;
> - unsigned int irq_count; /* For detecting broken IRQs */
> + unsigned long tot_count;
> unsigned long last_unhandled; /* Aging timer for unhandled count */
> + unsigned int irq_count; /* For detecting broken IRQs */
> unsigned int irqs_unhandled;
> atomic_t threads_handled;
> int threads_handled_last;
> --- a/kernel/irq/proc.c
> +++ b/kernel/irq/proc.c
> @@ -521,7 +521,16 @@ int show_interrupts(struct seq_file *p,
> return 0;
>
> seq_printf(p, "%*d:", prec, i);
> - irq_proc_emit_counts(p, &desc->kstat_irqs->cnt);
> +
> + /*
> + * Always output per CPU interrupts. Output device interrupts only when
> + * desc::tot_count is not zero.
> + */
> + if (irq_settings_is_per_cpu(desc) || irq_settings_is_per_cpu_devid(desc) ||
> + data_race(desc->tot_count))
> + irq_proc_emit_counts(p, &desc->kstat_irqs->cnt);
> + else
> + irq_proc_emit_zero_counts(p, num_online_cpus());
> seq_putc(p, ' ');
>
> guard(raw_spinlock_irq)(&desc->lock);
Reviewed-by: Radu Rendec <radu@rendec.net>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-20 13:21 ` [patch v2 04/14] x86/irq: Make irqstats array based Thomas Gleixner
2026-03-20 16:39 ` Michael Kelley
@ 2026-03-23 19:24 ` Radu Rendec
2026-03-24 19:54 ` Thomas Gleixner
1 sibling, 1 reply; 43+ messages in thread
From: Radu Rendec @ 2026-03-23 19:24 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
> Having the x86 specific interrupt statistics as a data structure with
> individual members instead of an array is just stupid as it requires
> endless copy and paste in arch_show_interrupts() and arch_irq_stat_cpu(),
> where the latter does not even take the latest interrupt additions into
> account. The resulting #ifdef orgy is just disgusting.
>
> Convert it to an array of counters, which does not make a difference in the
> actual interrupt hotpath increment as the array index is constant and
> therefore not any different than the member based access.
>
> But in arch_show_interrupts() and arch_irq_stat_cpu() this just turns into
> a loop, which reduces the text size by ~2k (~12%):
>
> text data bss dec hex filename
> 19643 15250 904 35797 8bd5 ../build/arch/x86/kernel/irq.o
> 17355 15250 904 33509 82e5 ../build/arch/x86/kernel/irq.o
>
> Adding a new vector or software counter only requires to update the table
> and everything just works. Using the core provided emit function which
> speeds up 0 outputs makes it significantly faster.
>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> ---
> V2: Simplified and extended vector skip mechanism
> Fixup the typoes - Micheal, Dmitry
> Added the lost precision back for ERR/MIS - Dmitry
> ---
In the interest of keeping this email readable, I removed the diff part
and I'm commenting below every file in the diffstat section.
> arch/x86/events/amd/core.c | 2
> arch/x86/events/amd/ibs.c | 2
> arch/x86/events/core.c | 2
> arch/x86/events/intel/core.c | 2
> arch/x86/events/intel/knc.c | 2
> arch/x86/events/intel/p4.c | 2
> arch/x86/events/zhaoxin/core.c | 2
> arch/x86/hyperv/hv_init.c | 2
> arch/x86/include/asm/hardirq.h | 69 +++++-----
I believe this breaks scripts/gdb/linux/interrupts.py, particularly the
x86_show_irqstat() function, which still expects individual members in
struct irq_cpustat_t.
> arch/x86/include/asm/irq.h | 2
> arch/x86/include/asm/mce.h | 3
> arch/x86/kernel/apic/apic.c | 4
> arch/x86/kernel/apic/ipi.c | 2
> arch/x86/kernel/cpu/acrn.c | 2
> arch/x86/kernel/cpu/mce/amd.c | 2
> arch/x86/kernel/cpu/mce/core.c | 8 -
> arch/x86/kernel/cpu/mce/threshold.c | 2
> arch/x86/kernel/cpu/mshyperv.c | 4
> arch/x86/kernel/irq.c | 247 ++++++++++++------------------------
Nit: This moves the description text by one more space to the left (in
addition to the one in patch 1), and now the description is no longer
aligned to the generic interrupts.
> arch/x86/kernel/irq_work.c | 2
> arch/x86/kernel/irqinit.c | 2
> arch/x86/kernel/kvm.c | 2
> arch/x86/kernel/nmi.c | 4
> arch/x86/kernel/smp.c | 6
> arch/x86/mm/tlb.c | 2
> arch/x86/xen/enlighten_hvm.c | 2
> arch/x86/xen/enlighten_pv.c | 2
> arch/x86/xen/smp.c | 6
> arch/x86/xen/smp_pv.c | 2
> 29 files changed, 158 insertions(+), 233 deletions(-)
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 05/14] genirq: Expose nr_irqs in core code
2026-03-20 13:21 ` [patch v2 05/14] genirq: Expose nr_irqs in core code Thomas Gleixner
@ 2026-03-23 19:48 ` Radu Rendec
2026-03-23 21:27 ` Thomas Gleixner
0 siblings, 1 reply; 43+ messages in thread
From: Radu Rendec @ 2026-03-23 19:48 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
> ... to avoid function calls in the core code to retrieve the maximum number
> of interrupts.
>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
> ---
> kernel/irq/internals.h | 1 +
> kernel/irq/irqdesc.c | 28 ++++++++++++++--------------
> kernel/irq/proc.c | 2 +-
> 3 files changed, 16 insertions(+), 15 deletions(-)
>
> --- a/kernel/irq/internals.h
> +++ b/kernel/irq/internals.h
> @@ -21,6 +21,7 @@
>
> extern bool noirqdebug;
> extern int irq_poll_cpu;
> +extern unsigned int total_nr_irqs;
>
> extern struct irqaction chained_action;
>
> --- a/kernel/irq/irqdesc.c
> +++ b/kernel/irq/irqdesc.c
> @@ -140,14 +140,14 @@ static void desc_set_defaults(unsigned i
> desc_smp_init(desc, node, affinity);
> }
>
> -static unsigned int nr_irqs = NR_IRQS;
> +unsigned int total_nr_irqs __read_mostly = NR_IRQS;
>
> /**
> * irq_get_nr_irqs() - Number of interrupts supported by the system.
> */
> unsigned int irq_get_nr_irqs(void)
> {
> - return nr_irqs;
> + return total_nr_irqs;
> }
> EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
>
> @@ -159,7 +159,7 @@ EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
> */
> unsigned int irq_set_nr_irqs(unsigned int nr)
> {
> - nr_irqs = nr;
> + total_nr_irqs = nr;
>
> return nr;
> }
> @@ -187,9 +187,9 @@ static unsigned int irq_find_at_or_after
> struct irq_desc *desc;
>
> guard(rcu)();
> - desc = mt_find(&sparse_irqs, &index, nr_irqs);
> + desc = mt_find(&sparse_irqs, &index, total_nr_irqs);
>
> - return desc ? irq_desc_get_irq(desc) : nr_irqs;
> + return desc ? irq_desc_get_irq(desc) : total_nr_irqs;
> }
>
> static void irq_insert_desc(unsigned int irq, struct irq_desc *desc)
> @@ -543,7 +543,7 @@ static bool irq_expand_nr_irqs(unsigned
> {
> if (nr > MAX_SPARSE_IRQS)
> return false;
> - nr_irqs = nr;
> + total_nr_irqs = nr;
> return true;
> }
>
> @@ -557,16 +557,16 @@ int __init early_irq_init(void)
> /* Let arch update nr_irqs and return the nr of preallocated irqs */
> initcnt = arch_probe_nr_irqs();
> printk(KERN_INFO "NR_IRQS: %d, nr_irqs: %d, preallocated irqs: %d\n",
> - NR_IRQS, nr_irqs, initcnt);
> + NR_IRQS, total_nr_irqs, initcnt);
>
> - if (WARN_ON(nr_irqs > MAX_SPARSE_IRQS))
> - nr_irqs = MAX_SPARSE_IRQS;
> + if (WARN_ON(total_nr_irqs > MAX_SPARSE_IRQS))
> + total_nr_irqs = MAX_SPARSE_IRQS;
>
> if (WARN_ON(initcnt > MAX_SPARSE_IRQS))
> initcnt = MAX_SPARSE_IRQS;
>
> - if (initcnt > nr_irqs)
> - nr_irqs = initcnt;
> + if (initcnt > total_nr_irqs)
> + total_nr_irqs = initcnt;
>
> for (i = 0; i < initcnt; i++) {
> desc = alloc_desc(i, node, 0, NULL, NULL);
> @@ -862,7 +862,7 @@ void irq_free_descs(unsigned int from, u
> {
> int i;
>
> - if (from >= nr_irqs || (from + cnt) > nr_irqs)
> + if (from >= total_nr_irqs || (from + cnt) > total_nr_irqs)
> return;
>
> guard(mutex)(&sparse_irq_lock);
> @@ -911,7 +911,7 @@ int __ref __irq_alloc_descs(int irq, uns
> if (irq >=0 && start != irq)
> return -EEXIST;
>
> - if (start + cnt > nr_irqs) {
> + if (start + cnt > total_nr_irqs) {
> if (!irq_expand_nr_irqs(start + cnt))
> return -ENOMEM;
> }
> @@ -923,7 +923,7 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs);
> * irq_get_next_irq - get next allocated irq number
> * @offset: where to start the search
> *
> - * Returns next irq number after offset or nr_irqs if none is found.
> + * Returns next irq number after offset or total_nr_irqs if none is found.
> */
> unsigned int irq_get_next_irq(unsigned int offset)
> {
> --- a/kernel/irq/proc.c
> +++ b/kernel/irq/proc.c
> @@ -447,7 +447,7 @@ int __weak arch_show_interrupts(struct s
> }
>
> #ifndef ACTUAL_NR_IRQS
> -# define ACTUAL_NR_IRQS irq_get_nr_irqs()
> +# define ACTUAL_NR_IRQS total_nr_irqs
> #endif
>
> #define ZSTR1 " 0"
Nit: This is modified again in the next patch, so it doesn't matter.
But for the purpose of this patch in isolation, looking at where
ACTUAL_NR_IRQS is used, in show_interrupts() the nr_irqs variable
should also be initialized to total_nr_irqs to avoid the extra function
call.
Reviewed-by: Radu Rendec <radu@rendec.net>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 06/14] genirq: Cache the condition for /proc/interrupts exposure
2026-03-20 13:21 ` [patch v2 06/14] genirq: Cache the condition for /proc/interrupts exposure Thomas Gleixner
@ 2026-03-23 20:58 ` Radu Rendec
2026-03-24 20:31 ` Thomas Gleixner
0 siblings, 1 reply; 43+ messages in thread
From: Radu Rendec @ 2026-03-23 20:58 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
> show_interrupts() evaluates a boatload of conditions to establish whether
> exposing an interrupt in /proc/interrupts or not.
>
> That can be simplified by caching the condition in an internal status flag,
> which is updated when one of the relevant inputs changes.
>
> As a result the number of instructions and branches for reading
> /proc/interrupts is reduced significantly.
>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
> ---
> V2: s/IRQF_/IRQ/ and fixup the enum treatment - Dmitry
> ---
> include/linux/irq.h | 1 +
> kernel/irq/chip.c | 2 ++
> kernel/irq/internals.h | 2 ++
> kernel/irq/manage.c | 2 ++
> kernel/irq/proc.c | 16 ++++++++++++----
> kernel/irq/settings.h | 13 +++++++++++++
> 6 files changed, 32 insertions(+), 4 deletions(-)
>
> --- a/include/linux/irq.h
> +++ b/include/linux/irq.h
> @@ -99,6 +99,7 @@ enum {
> IRQ_DISABLE_UNLAZY = (1 << 19),
> IRQ_HIDDEN = (1 << 20),
> IRQ_NO_DEBUG = (1 << 21),
> + IRQ_RESERVED = (1 << 22),
> };
>
> #define IRQF_MODIFY_MASK \
> --- a/kernel/irq/chip.c
> +++ b/kernel/irq/chip.c
> @@ -1004,6 +1004,7 @@ static void
> WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc)));
> irq_activate_and_startup(desc, IRQ_RESEND);
> }
> + irq_proc_update_valid(desc);
> }
>
> void __irq_set_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
> @@ -1064,6 +1065,7 @@ void irq_modify_status(unsigned int irq,
> trigger = tmp;
>
> irqd_set(&desc->irq_data, trigger);
> + irq_proc_update_valid(desc);
> }
> }
> EXPORT_SYMBOL_GPL(irq_modify_status);
> --- a/kernel/irq/internals.h
> +++ b/kernel/irq/internals.h
> @@ -123,6 +123,7 @@ extern void register_irq_proc(unsigned i
> extern void unregister_irq_proc(unsigned int irq, struct irq_desc *desc);
> extern void register_handler_proc(unsigned int irq, struct irqaction *action);
> extern void unregister_handler_proc(unsigned int irq, struct irqaction *action);
> +void irq_proc_update_valid(struct irq_desc *desc);
> #else
> static inline void register_irq_proc(unsigned int irq, struct irq_desc *desc) { }
> static inline void unregister_irq_proc(unsigned int irq, struct irq_desc *desc) { }
> @@ -130,6 +131,7 @@ static inline void register_handler_proc
> struct irqaction *action) { }
> static inline void unregister_handler_proc(unsigned int irq,
> struct irqaction *action) { }
> +static inline void irq_proc_update_valid(struct irq_desc *desc) { }
> #endif
>
> extern bool irq_can_set_affinity_usr(unsigned int irq);
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -1802,6 +1802,7 @@ static int
> __enable_irq(desc);
> }
>
> + irq_proc_update_valid(desc);
> raw_spin_unlock_irqrestore(&desc->lock, flags);
> chip_bus_sync_unlock(desc);
> mutex_unlock(&desc->request_mutex);
> @@ -1906,6 +1907,7 @@ static struct irqaction *__free_irq(stru
> desc->affinity_hint = NULL;
> #endif
>
> + irq_proc_update_valid(desc);
> raw_spin_unlock_irqrestore(&desc->lock, flags);
> /*
> * Drop bus_lock here so the changes which were done in the chip
> --- a/kernel/irq/proc.c
> +++ b/kernel/irq/proc.c
> @@ -439,6 +439,17 @@ void init_irq_proc(void)
> register_irq_proc(irq, desc);
> }
>
> +void irq_proc_update_valid(struct irq_desc *desc)
> +{
> + u32 set = _IRQ_PROC_VALID;
> +
> + if (irq_settings_is_hidden(desc) || !desc->action ||
> + irq_desc_is_chained(desc) || !desc->kstat_irqs)
Can desc->kstat_irqs ever be NULL? Looking at kernel/irq/irqdesc.c, it
seems to me that it's allocated always and very early (before every
other field in struct irq_desc), and with an explicit check for NULL on
allocation. It's also deallocated late, right before struct irq_desc
itself is deallocated (in irq_kobj_release()).
Other than that, LGTM.
> + set = 0;
> +
> + irq_settings_update_proc_valid(desc, set);
> +}
> +
> #ifdef CONFIG_GENERIC_IRQ_SHOW
>
> int __weak arch_show_interrupts(struct seq_file *p, int prec)
> @@ -514,10 +525,7 @@ int show_interrupts(struct seq_file *p,
>
> guard(rcu)();
> desc = irq_to_desc(i);
> - if (!desc || irq_settings_is_hidden(desc))
> - return 0;
> -
> - if (!desc->action || irq_desc_is_chained(desc) || !desc->kstat_irqs)
> + if (!desc || !irq_settings_proc_valid(desc))
> return 0;
>
> seq_printf(p, "%*d:", prec, i);
> --- a/kernel/irq/settings.h
> +++ b/kernel/irq/settings.h
> @@ -18,6 +18,7 @@ enum {
> _IRQ_DISABLE_UNLAZY = IRQ_DISABLE_UNLAZY,
> _IRQ_HIDDEN = IRQ_HIDDEN,
> _IRQ_NO_DEBUG = IRQ_NO_DEBUG,
> + _IRQ_PROC_VALID = IRQ_RESERVED,
> _IRQF_MODIFY_MASK = IRQF_MODIFY_MASK,
> };
>
> @@ -34,6 +35,7 @@ enum {
> #define IRQ_DISABLE_UNLAZY GOT_YOU_MORON
> #define IRQ_HIDDEN GOT_YOU_MORON
> #define IRQ_NO_DEBUG GOT_YOU_MORON
> +#define IRQ_RESERVED GOT_YOU_MORON
> #undef IRQF_MODIFY_MASK
> #define IRQF_MODIFY_MASK GOT_YOU_MORON
>
> @@ -180,3 +182,14 @@ static inline bool irq_settings_no_debug
> {
> return desc->status_use_accessors & _IRQ_NO_DEBUG;
> }
> +
> +static inline bool irq_settings_proc_valid(struct irq_desc *desc)
> +{
> + return desc->status_use_accessors & _IRQ_PROC_VALID;
> +}
> +
> +static inline void irq_settings_update_proc_valid(struct irq_desc *desc, u32 set)
> +{
> + desc->status_use_accessors &= ~_IRQ_PROC_VALID;
> + desc->status_use_accessors |= (set & _IRQ_PROC_VALID);
> +}
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 05/14] genirq: Expose nr_irqs in core code
2026-03-23 19:48 ` Radu Rendec
@ 2026-03-23 21:27 ` Thomas Gleixner
0 siblings, 0 replies; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-23 21:27 UTC (permalink / raw)
To: Radu Rendec, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On Mon, Mar 23 2026 at 15:48, Radu Rendec wrote:
> On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
>> #ifndef ACTUAL_NR_IRQS
>> -# define ACTUAL_NR_IRQS irq_get_nr_irqs()
>> +# define ACTUAL_NR_IRQS total_nr_irqs
>> #endif
>>
>> #define ZSTR1 " 0"
>
> Nit: This is modified again in the next patch, so it doesn't matter.
> But for the purpose of this patch in isolation, looking at where
> ACTUAL_NR_IRQS is used, in show_interrupts() the nr_irqs variable
> should also be initialized to total_nr_irqs to avoid the extra function
> call.
Indeed.
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-23 19:24 ` Radu Rendec
@ 2026-03-24 19:54 ` Thomas Gleixner
2026-03-24 20:21 ` Thomas Gleixner
0 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-24 19:54 UTC (permalink / raw)
To: Radu Rendec, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On Mon, Mar 23 2026 at 15:24, Radu Rendec wrote:
> On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
>> arch/x86/include/asm/hardirq.h | 69 +++++-----
>
> I believe this breaks scripts/gdb/linux/interrupts.py, particularly the
> x86_show_irqstat() function, which still expects individual members in
> struct irq_cpustat_t.
Uurg. I did not even know this exists. Let me try to polish the snake.
>> arch/x86/kernel/irq.c | 247 ++++++++++++------------------------
>
> Nit: This moves the description text by one more space to the left (in
> addition to the one in patch 1), and now the description is no longer
> aligned to the generic interrupts.
Fixed.
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-24 19:54 ` Thomas Gleixner
@ 2026-03-24 20:21 ` Thomas Gleixner
2026-03-24 20:32 ` Radu Rendec
0 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-24 20:21 UTC (permalink / raw)
To: Radu Rendec, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Florian Fainelli
On Tue, Mar 24 2026 at 20:54, Thomas Gleixner wrote:
> On Mon, Mar 23 2026 at 15:24, Radu Rendec wrote:
>> On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
>>> arch/x86/include/asm/hardirq.h | 69 +++++-----
>>
>> I believe this breaks scripts/gdb/linux/interrupts.py, particularly the
>> x86_show_irqstat() function, which still expects individual members in
>> struct irq_cpustat_t.
>
> Uurg. I did not even know this exists. Let me try to polish the snake.
Something like the below should work, now I have to figure out how to
test it.
----
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -97,8 +97,8 @@ irq_desc_type = utils.CachedType("struct
text += "%*s: %10u\n" % (prec, "ERR", cnt['counter'])
return text
-def x86_show_irqstat(prec, pfx, field, desc):
- irq_stat = gdb.parse_and_eval("&irq_stat")
+def x86_show_irqstat(prec, pfx, idx, desc):
+ irq_stat = gdb.parse_and_eval("&irq_stat.counts[IRQ_COUNT_%s]" %idx)
text = "%*s: " % (prec, pfx)
for cpu in cpus.each_online_cpu():
stat = cpus.per_cpu(irq_stat, cpu)
@@ -118,32 +118,51 @@ irq_desc_type = utils.CachedType("struct
text = x86_show_irqstat(prec, "NMI", '__nmi_count', 'Non-maskable interrupts')
if constants.LX_CONFIG_X86_LOCAL_APIC:
- text += x86_show_irqstat(prec, "LOC", 'apic_timer_irqs', "Local timer interrupts")
- text += x86_show_irqstat(prec, "SPU", 'irq_spurious_count', "Spurious interrupts")
- text += x86_show_irqstat(prec, "PMI", 'apic_perf_irqs', "Performance monitoring interrupts")
- text += x86_show_irqstat(prec, "IWI", 'apic_irq_work_irqs', "IRQ work interrupts")
- text += x86_show_irqstat(prec, "RTR", 'icr_read_retry_count', "APIC ICR read retries")
+ text += x86_show_irqstat(prec, "LOC", 'APIC_TIMER', "Local timer interrupts")
+ text += x86_show_irqstat(prec, "SPU", 'SPURIOUS', "Spurious interrupts")
+ text += x86_show_irqstat(prec, "PMI", 'APIC_PERF', "Performance monitoring interrupts")
+ text += x86_show_irqstat(prec, "IWI", 'IRQ_WORK', "IRQ work interrupts")
+ text += x86_show_irqstat(prec, "RTR", 'ICR_READ_RETRY', "APIC ICR read retries")
if utils.gdb_eval_or_none("x86_platform_ipi_callback") is not None:
- text += x86_show_irqstat(prec, "PLT", 'x86_platform_ipis', "Platform interrupts")
+ text += x86_show_irqstat(prec, "PLT", 'X86_PLATFORM_IPI', "Platform interrupts")
if constants.LX_CONFIG_SMP:
- text += x86_show_irqstat(prec, "RES", 'irq_resched_count', "Rescheduling interrupts")
- text += x86_show_irqstat(prec, "CAL", 'irq_call_count', "Function call interrupts")
- text += x86_show_irqstat(prec, "TLB", 'irq_tlb_count', "TLB shootdowns")
+ text += x86_show_irqstat(prec, "RES", 'RESCHEDULE', "Rescheduling interrupts")
+ text += x86_show_irqstat(prec, "CAL", 'CALL_FUNCTION', "Function call interrupts")
+
+ text += x86_show_irqstat(prec, "TLB", 'TLB', "TLB shootdowns")
if constants.LX_CONFIG_X86_THERMAL_VECTOR:
- text += x86_show_irqstat(prec, "TRM", 'irq_thermal_count', "Thermal events interrupts")
+ text += x86_show_irqstat(prec, "TRM", 'THERMAL_APIC', "Thermal events interrupts")
if constants.LX_CONFIG_X86_MCE_THRESHOLD:
- text += x86_show_irqstat(prec, "THR", 'irq_threshold_count', "Threshold APIC interrupts")
+ text += x86_show_irqstat(prec, "THR", 'THRESHOLD_APIC', "Threshold APIC interrupts")
if constants.LX_CONFIG_X86_MCE_AMD:
- text += x86_show_irqstat(prec, "DFR", 'irq_deferred_error_count', "Deferred Error APIC interrupts")
+ text += x86_show_irqstat(prec, "DFR", 'DEFERRED_ERROR', "Deferred Error APIC interrupts")
if constants.LX_CONFIG_X86_MCE:
text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions")
text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls")
+ if constants.LX_CONFIG_X86_HV_CALLBACK_VECTOR:
+ text += x86_show_irqstat(prec, 'HYP', 'HYPERVISOR_CALLBACK', 'Hypervisor callback interrupts')
+
+ if constants.LX_CONFIG_HYPERV:
+ text += x86_show_irqstat(prec, 'HRE', 'HYPERV_REENLIGHTENMENT', 'Hyper-V reenlightenment interrupts')
+ text += x86_show_irqstat(prec, 'HVS', 'HYPERV_STIMER0', 'Hyper-V stimer0 interrupts')
+
+ if constants.LX_CONFIG_KVM:
+ text += x86_show_irqstat(prec, "PIN", 'POSTED_INTR', 'Posted-interrupt notification event')
+ text += x86_show_irqstat(prec, "NPI", 'POSTED_INTR_NESTED', 'Nested posted-interrupt event')
+ text += x86_show_irqstat(prec, "PIW", 'POSTED_INTR_WAKEUP', 'Posted-interrupt wakeup event')
+
+ if constants.LX_CONFIG_GUEST_PERF_EVENTS:
+ text += x86_show_irqstat(prec, "VPMI", 'PERF_GUEST_MEDIATED_PMI', 'Perf Guest Mediated PMI')
+
+ if constants.LX_CONFIG_X86_POSTED_MSI:
+ text += x86_show_irqstat(prec, "PIN", 'POSTED_MSI_NOTIFICATION', 'Posted MSI notification event')
+
text += show_irq_err_count(prec)
if constants.LX_CONFIG_X86_IO_APIC:
@@ -151,11 +170,6 @@ irq_desc_type = utils.CachedType("struct
if cnt is not None:
text += "%*s: %10u\n" % (prec, "MIS", cnt['counter'])
- if constants.LX_CONFIG_KVM:
- text += x86_show_irqstat(prec, "PIN", 'kvm_posted_intr_ipis', 'Posted-interrupt notification event')
- text += x86_show_irqstat(prec, "NPI", 'kvm_posted_intr_nested_ipis', 'Nested posted-interrupt event')
- text += x86_show_irqstat(prec, "PIW", 'kvm_posted_intr_wakeup_ipis', 'Posted-interrupt wakeup event')
-
return text
def arm_common_show_interrupts(prec):
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 06/14] genirq: Cache the condition for /proc/interrupts exposure
2026-03-23 20:58 ` Radu Rendec
@ 2026-03-24 20:31 ` Thomas Gleixner
2026-03-24 20:36 ` Radu Rendec
0 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-24 20:31 UTC (permalink / raw)
To: Radu Rendec, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On Mon, Mar 23 2026 at 16:58, Radu Rendec wrote:
>> +void irq_proc_update_valid(struct irq_desc *desc)
>> +{
>> + u32 set = _IRQ_PROC_VALID;
>> +
>> + if (irq_settings_is_hidden(desc) || !desc->action ||
>> + irq_desc_is_chained(desc) || !desc->kstat_irqs)
>
> Can desc->kstat_irqs ever be NULL? Looking at kernel/irq/irqdesc.c, it
> seems to me that it's allocated always and very early (before every
> other field in struct irq_desc), and with an explicit check for NULL on
> allocation. It's also deallocated late, right before struct irq_desc
> itself is deallocated (in irq_kobj_release()).
Well spotted. That's a left over from histerical code.
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-24 20:21 ` Thomas Gleixner
@ 2026-03-24 20:32 ` Radu Rendec
2026-03-25 19:20 ` Radu Rendec
0 siblings, 1 reply; 43+ messages in thread
From: Radu Rendec @ 2026-03-24 20:32 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Florian Fainelli
On Tue, 2026-03-24 at 21:21 +0100, Thomas Gleixner wrote:
> On Tue, Mar 24 2026 at 20:54, Thomas Gleixner wrote:
>
> > On Mon, Mar 23 2026 at 15:24, Radu Rendec wrote:
> > > On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
> > > > arch/x86/include/asm/hardirq.h | 69 +++++-----
> > >
> > > I believe this breaks scripts/gdb/linux/interrupts.py, particularly the
> > > x86_show_irqstat() function, which still expects individual members in
> > > struct irq_cpustat_t.
> >
> > Uurg. I did not even know this exists. Let me try to polish the snake.
>
> Something like the below should work, now I have to figure out how to
> test it.
At a glance, it looks about right (although I haven't tested it). The
test procedure involves running the kernel in a Qemu VM and attaching
with gdb to Qemu's gdb server (apologies if I'm just stating the
obvious here). It's briefly described here:
Documentation/dev-tools/gdb-kernel-debugging.rst
The documentation doesn't elaborate on Qemu parameters. I'm using
something like this:
qemu-system-x86_64 -nographic -m 1G -accel kvm -machine q35,hpet=off -cpu host -smp 4 \
-netdev bridge,br=vbr-nat,id=net0 -device virtio-net-pci,mac=52:54:98:aa:bb:cc,netdev=net0 \
-drive file=root.img,format=raw,if=virtio \
-kernel bzImage \
-append "console=ttyS0 root=/dev/vda1 rw nokaslr" \
-s
Happy to help if something doesn't quite work for you.
> ----
> --- a/scripts/gdb/linux/interrupts.py
> +++ b/scripts/gdb/linux/interrupts.py
> @@ -97,8 +97,8 @@ irq_desc_type = utils.CachedType("struct
> text += "%*s: %10u\n" % (prec, "ERR", cnt['counter'])
> return text
>
> -def x86_show_irqstat(prec, pfx, field, desc):
> - irq_stat = gdb.parse_and_eval("&irq_stat")
> +def x86_show_irqstat(prec, pfx, idx, desc):
> + irq_stat = gdb.parse_and_eval("&irq_stat.counts[IRQ_COUNT_%s]" %idx)
> text = "%*s: " % (prec, pfx)
> for cpu in cpus.each_online_cpu():
> stat = cpus.per_cpu(irq_stat, cpu)
> @@ -118,32 +118,51 @@ irq_desc_type = utils.CachedType("struct
> text = x86_show_irqstat(prec, "NMI", '__nmi_count', 'Non-maskable interrupts')
>
> if constants.LX_CONFIG_X86_LOCAL_APIC:
> - text += x86_show_irqstat(prec, "LOC", 'apic_timer_irqs', "Local timer interrupts")
> - text += x86_show_irqstat(prec, "SPU", 'irq_spurious_count', "Spurious interrupts")
> - text += x86_show_irqstat(prec, "PMI", 'apic_perf_irqs', "Performance monitoring interrupts")
> - text += x86_show_irqstat(prec, "IWI", 'apic_irq_work_irqs', "IRQ work interrupts")
> - text += x86_show_irqstat(prec, "RTR", 'icr_read_retry_count', "APIC ICR read retries")
> + text += x86_show_irqstat(prec, "LOC", 'APIC_TIMER', "Local timer interrupts")
> + text += x86_show_irqstat(prec, "SPU", 'SPURIOUS', "Spurious interrupts")
> + text += x86_show_irqstat(prec, "PMI", 'APIC_PERF', "Performance monitoring interrupts")
> + text += x86_show_irqstat(prec, "IWI", 'IRQ_WORK', "IRQ work interrupts")
> + text += x86_show_irqstat(prec, "RTR", 'ICR_READ_RETRY', "APIC ICR read retries")
> if utils.gdb_eval_or_none("x86_platform_ipi_callback") is not None:
> - text += x86_show_irqstat(prec, "PLT", 'x86_platform_ipis', "Platform interrupts")
> + text += x86_show_irqstat(prec, "PLT", 'X86_PLATFORM_IPI', "Platform interrupts")
>
> if constants.LX_CONFIG_SMP:
> - text += x86_show_irqstat(prec, "RES", 'irq_resched_count', "Rescheduling interrupts")
> - text += x86_show_irqstat(prec, "CAL", 'irq_call_count', "Function call interrupts")
> - text += x86_show_irqstat(prec, "TLB", 'irq_tlb_count', "TLB shootdowns")
> + text += x86_show_irqstat(prec, "RES", 'RESCHEDULE', "Rescheduling interrupts")
> + text += x86_show_irqstat(prec, "CAL", 'CALL_FUNCTION', "Function call interrupts")
> +
> + text += x86_show_irqstat(prec, "TLB", 'TLB', "TLB shootdowns")
>
> if constants.LX_CONFIG_X86_THERMAL_VECTOR:
> - text += x86_show_irqstat(prec, "TRM", 'irq_thermal_count', "Thermal events interrupts")
> + text += x86_show_irqstat(prec, "TRM", 'THERMAL_APIC', "Thermal events interrupts")
>
> if constants.LX_CONFIG_X86_MCE_THRESHOLD:
> - text += x86_show_irqstat(prec, "THR", 'irq_threshold_count', "Threshold APIC interrupts")
> + text += x86_show_irqstat(prec, "THR", 'THRESHOLD_APIC', "Threshold APIC interrupts")
>
> if constants.LX_CONFIG_X86_MCE_AMD:
> - text += x86_show_irqstat(prec, "DFR", 'irq_deferred_error_count', "Deferred Error APIC interrupts")
> + text += x86_show_irqstat(prec, "DFR", 'DEFERRED_ERROR', "Deferred Error APIC interrupts")
>
> if constants.LX_CONFIG_X86_MCE:
> text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions")
> text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls")
>
> + if constants.LX_CONFIG_X86_HV_CALLBACK_VECTOR:
> + text += x86_show_irqstat(prec, 'HYP', 'HYPERVISOR_CALLBACK', 'Hypervisor callback interrupts')
> +
> + if constants.LX_CONFIG_HYPERV:
> + text += x86_show_irqstat(prec, 'HRE', 'HYPERV_REENLIGHTENMENT', 'Hyper-V reenlightenment interrupts')
> + text += x86_show_irqstat(prec, 'HVS', 'HYPERV_STIMER0', 'Hyper-V stimer0 interrupts')
> +
> + if constants.LX_CONFIG_KVM:
> + text += x86_show_irqstat(prec, "PIN", 'POSTED_INTR', 'Posted-interrupt notification event')
> + text += x86_show_irqstat(prec, "NPI", 'POSTED_INTR_NESTED', 'Nested posted-interrupt event')
> + text += x86_show_irqstat(prec, "PIW", 'POSTED_INTR_WAKEUP', 'Posted-interrupt wakeup event')
> +
> + if constants.LX_CONFIG_GUEST_PERF_EVENTS:
> + text += x86_show_irqstat(prec, "VPMI", 'PERF_GUEST_MEDIATED_PMI', 'Perf Guest Mediated PMI')
> +
> + if constants.LX_CONFIG_X86_POSTED_MSI:
> + text += x86_show_irqstat(prec, "PIN", 'POSTED_MSI_NOTIFICATION', 'Posted MSI notification event')
> +
> text += show_irq_err_count(prec)
>
> if constants.LX_CONFIG_X86_IO_APIC:
> @@ -151,11 +170,6 @@ irq_desc_type = utils.CachedType("struct
> if cnt is not None:
> text += "%*s: %10u\n" % (prec, "MIS", cnt['counter'])
>
> - if constants.LX_CONFIG_KVM:
> - text += x86_show_irqstat(prec, "PIN", 'kvm_posted_intr_ipis', 'Posted-interrupt notification event')
> - text += x86_show_irqstat(prec, "NPI", 'kvm_posted_intr_nested_ipis', 'Nested posted-interrupt event')
> - text += x86_show_irqstat(prec, "PIW", 'kvm_posted_intr_wakeup_ipis', 'Posted-interrupt wakeup event')
> -
> return text
>
> def arm_common_show_interrupts(prec):
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 06/14] genirq: Cache the condition for /proc/interrupts exposure
2026-03-24 20:31 ` Thomas Gleixner
@ 2026-03-24 20:36 ` Radu Rendec
0 siblings, 0 replies; 43+ messages in thread
From: Radu Rendec @ 2026-03-24 20:36 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On Tue, 2026-03-24 at 21:31 +0100, Thomas Gleixner wrote:
> On Mon, Mar 23 2026 at 16:58, Radu Rendec wrote:
> > > +void irq_proc_update_valid(struct irq_desc *desc)
> > > +{
> > > + u32 set = _IRQ_PROC_VALID;
> > > +
> > > + if (irq_settings_is_hidden(desc) || !desc->action ||
> > > + irq_desc_is_chained(desc) || !desc->kstat_irqs)
> >
> > Can desc->kstat_irqs ever be NULL? Looking at kernel/irq/irqdesc.c, it
> > seems to me that it's allocated always and very early (before every
> > other field in struct irq_desc), and with an explicit check for NULL on
> > allocation. It's also deallocated late, right before struct irq_desc
> > itself is deallocated (in irq_kobj_release()).
>
> Well spotted. That's a left over from histerical code.
Thanks for confirming! I was intrigued because you removed that check
in a previous patch, and I looked. And while looking, I noticed there
were quite a few other spots where the check was still done, and I
wanted to ask you anyway. I'll prepare a cleanup patch and send it.
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-24 20:32 ` Radu Rendec
@ 2026-03-25 19:20 ` Radu Rendec
2026-03-25 22:52 ` Thomas Gleixner
0 siblings, 1 reply; 43+ messages in thread
From: Radu Rendec @ 2026-03-25 19:20 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Florian Fainelli
On Tue, 2026-03-24 at 16:32 -0400, Radu Rendec wrote:
> On Tue, 2026-03-24 at 21:21 +0100, Thomas Gleixner wrote:
> > On Tue, Mar 24 2026 at 20:54, Thomas Gleixner wrote:
> >
> > > On Mon, Mar 23 2026 at 15:24, Radu Rendec wrote:
> > > > On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
> > > > > arch/x86/include/asm/hardirq.h | 69 +++++-----
> > > >
> > > > I believe this breaks scripts/gdb/linux/interrupts.py, particularly the
> > > > x86_show_irqstat() function, which still expects individual members in
> > > > struct irq_cpustat_t.
> > >
> > > Uurg. I did not even know this exists. Let me try to polish the snake.
> >
> > Something like the below should work, now I have to figure out how to
> > test it.
>
> At a glance, it looks about right (although I haven't tested it). The
> test procedure involves running the kernel in a Qemu VM and attaching
> with gdb to Qemu's gdb server (apologies if I'm just stating the
> obvious here). It's briefly described here:
> Documentation/dev-tools/gdb-kernel-debugging.rst
>
> The documentation doesn't elaborate on Qemu parameters. I'm using
> something like this:
>
> qemu-system-x86_64 -nographic -m 1G -accel kvm -machine q35,hpet=off -cpu host -smp 4 \
> -netdev bridge,br=vbr-nat,id=net0 -device virtio-net-pci,mac=52:54:98:aa:bb:cc,netdev=net0 \
> -drive file=root.img,format=raw,if=virtio \
> -kernel bzImage \
> -append "console=ttyS0 root=/dev/vda1 rw nokaslr" \
> -s
>
> Happy to help if something doesn't quite work for you.
>
> > ----
> > --- a/scripts/gdb/linux/interrupts.py
> > +++ b/scripts/gdb/linux/interrupts.py
> > @@ -97,8 +97,8 @@ irq_desc_type = utils.CachedType("struct
> > text += "%*s: %10u\n" % (prec, "ERR", cnt['counter'])
> > return text
> >
> > -def x86_show_irqstat(prec, pfx, field, desc):
> > - irq_stat = gdb.parse_and_eval("&irq_stat")
> > +def x86_show_irqstat(prec, pfx, idx, desc):
> > + irq_stat = gdb.parse_and_eval("&irq_stat.counts[IRQ_COUNT_%s]" %idx)
> > text = "%*s: " % (prec, pfx)
> > for cpu in cpus.each_online_cpu():
> > stat = cpus.per_cpu(irq_stat, cpu)
> > @@ -118,32 +118,51 @@ irq_desc_type = utils.CachedType("struct
> > text = x86_show_irqstat(prec, "NMI", '__nmi_count', 'Non-maskable interrupts')
> >
> > if constants.LX_CONFIG_X86_LOCAL_APIC:
> > - text += x86_show_irqstat(prec, "LOC", 'apic_timer_irqs', "Local timer interrupts")
> > - text += x86_show_irqstat(prec, "SPU", 'irq_spurious_count', "Spurious interrupts")
> > - text += x86_show_irqstat(prec, "PMI", 'apic_perf_irqs', "Performance monitoring interrupts")
> > - text += x86_show_irqstat(prec, "IWI", 'apic_irq_work_irqs', "IRQ work interrupts")
> > - text += x86_show_irqstat(prec, "RTR", 'icr_read_retry_count', "APIC ICR read retries")
> > + text += x86_show_irqstat(prec, "LOC", 'APIC_TIMER', "Local timer interrupts")
> > + text += x86_show_irqstat(prec, "SPU", 'SPURIOUS', "Spurious interrupts")
> > + text += x86_show_irqstat(prec, "PMI", 'APIC_PERF', "Performance monitoring interrupts")
> > + text += x86_show_irqstat(prec, "IWI", 'IRQ_WORK', "IRQ work interrupts")
> > + text += x86_show_irqstat(prec, "RTR", 'ICR_READ_RETRY', "APIC ICR read retries")
> > if utils.gdb_eval_or_none("x86_platform_ipi_callback") is not None:
> > - text += x86_show_irqstat(prec, "PLT", 'x86_platform_ipis', "Platform interrupts")
> > + text += x86_show_irqstat(prec, "PLT", 'X86_PLATFORM_IPI', "Platform interrupts")
> >
> > if constants.LX_CONFIG_SMP:
> > - text += x86_show_irqstat(prec, "RES", 'irq_resched_count', "Rescheduling interrupts")
> > - text += x86_show_irqstat(prec, "CAL", 'irq_call_count', "Function call interrupts")
> > - text += x86_show_irqstat(prec, "TLB", 'irq_tlb_count', "TLB shootdowns")
> > + text += x86_show_irqstat(prec, "RES", 'RESCHEDULE', "Rescheduling interrupts")
> > + text += x86_show_irqstat(prec, "CAL", 'CALL_FUNCTION', "Function call interrupts")
> > +
> > + text += x86_show_irqstat(prec, "TLB", 'TLB', "TLB shootdowns")
> >
> > if constants.LX_CONFIG_X86_THERMAL_VECTOR:
> > - text += x86_show_irqstat(prec, "TRM", 'irq_thermal_count', "Thermal events interrupts")
> > + text += x86_show_irqstat(prec, "TRM", 'THERMAL_APIC', "Thermal events interrupts")
> >
> > if constants.LX_CONFIG_X86_MCE_THRESHOLD:
> > - text += x86_show_irqstat(prec, "THR", 'irq_threshold_count', "Threshold APIC interrupts")
> > + text += x86_show_irqstat(prec, "THR", 'THRESHOLD_APIC', "Threshold APIC interrupts")
> >
> > if constants.LX_CONFIG_X86_MCE_AMD:
> > - text += x86_show_irqstat(prec, "DFR", 'irq_deferred_error_count', "Deferred Error APIC interrupts")
> > + text += x86_show_irqstat(prec, "DFR", 'DEFERRED_ERROR', "Deferred Error APIC interrupts")
> >
> > if constants.LX_CONFIG_X86_MCE:
> > text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions")
> > text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls")
> >
> > + if constants.LX_CONFIG_X86_HV_CALLBACK_VECTOR:
> > + text += x86_show_irqstat(prec, 'HYP', 'HYPERVISOR_CALLBACK', 'Hypervisor callback interrupts')
> > +
> > + if constants.LX_CONFIG_HYPERV:
> > + text += x86_show_irqstat(prec, 'HRE', 'HYPERV_REENLIGHTENMENT', 'Hyper-V reenlightenment interrupts')
> > + text += x86_show_irqstat(prec, 'HVS', 'HYPERV_STIMER0', 'Hyper-V stimer0 interrupts')
> > +
> > + if constants.LX_CONFIG_KVM:
> > + text += x86_show_irqstat(prec, "PIN", 'POSTED_INTR', 'Posted-interrupt notification event')
> > + text += x86_show_irqstat(prec, "NPI", 'POSTED_INTR_NESTED', 'Nested posted-interrupt event')
> > + text += x86_show_irqstat(prec, "PIW", 'POSTED_INTR_WAKEUP', 'Posted-interrupt wakeup event')
> > +
> > + if constants.LX_CONFIG_GUEST_PERF_EVENTS:
> > + text += x86_show_irqstat(prec, "VPMI", 'PERF_GUEST_MEDIATED_PMI', 'Perf Guest Mediated PMI')
> > +
> > + if constants.LX_CONFIG_X86_POSTED_MSI:
> > + text += x86_show_irqstat(prec, "PIN", 'POSTED_MSI_NOTIFICATION', 'Posted MSI notification event')
> > +
> > text += show_irq_err_count(prec)
> >
> > if constants.LX_CONFIG_X86_IO_APIC:
> > @@ -151,11 +170,6 @@ irq_desc_type = utils.CachedType("struct
> > if cnt is not None:
> > text += "%*s: %10u\n" % (prec, "MIS", cnt['counter'])
> >
> > - if constants.LX_CONFIG_KVM:
> > - text += x86_show_irqstat(prec, "PIN", 'kvm_posted_intr_ipis', 'Posted-interrupt notification event')
> > - text += x86_show_irqstat(prec, "NPI", 'kvm_posted_intr_nested_ipis', 'Nested posted-interrupt event')
> > - text += x86_show_irqstat(prec, "PIW", 'kvm_posted_intr_wakeup_ipis', 'Posted-interrupt wakeup event')
> > -
> > return text
> >
> > def arm_common_show_interrupts(prec):
I tested it, and it's very close but requires a few extra changes,
which I included as a patch at the bottom.
There's one more gotcha: in the next patch (patch 5) you rename nr_irqs
to total_nr_irqs, and the change must be applied to interrupts.py as
well. It's used at the beginning of the LxInterruptList.invoke()
function.
One other issue I came across is that the fields below are not defined
for me (even though the corresponding config options do exist in the
kernel config file):
constants.LX_CONFIG_X86_HV_CALLBACK_VECTOR
constants.LX_CONFIG_HYPERV
constants.LX_CONFIG_GUEST_PERF_EVENTS
constants.LX_CONFIG_X86_POSTED_MSI
I didn't spend the time to figure out why. I'm pretty sure this is
completely unrelated to your changes.
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -102,20 +102,12 @@ def x86_show_irqstat(prec, pfx, idx, desc):
text = "%*s: " % (prec, pfx)
for cpu in cpus.each_online_cpu():
stat = cpus.per_cpu(irq_stat, cpu)
- text += "%10u " % (stat[field])
- text += " %s\n" % (desc)
- return text
-
-def x86_show_mce(prec, var, pfx, desc):
- pvar = gdb.parse_and_eval(var)
- text = "%*s: " % (prec, pfx)
- for cpu in cpus.each_online_cpu():
- text += "%10u " % (cpus.per_cpu(pvar, cpu).dereference())
+ text += "%10u " % (stat.dereference())
text += " %s\n" % (desc)
return text
def x86_show_interupts(prec):
- text = x86_show_irqstat(prec, "NMI", '__nmi_count', 'Non-maskable interrupts')
+ text = x86_show_irqstat(prec, "NMI", 'NMI', 'Non-maskable interrupts')
if constants.LX_CONFIG_X86_LOCAL_APIC:
text += x86_show_irqstat(prec, "LOC", 'APIC_TIMER', "Local timer interrupts")
@@ -142,8 +134,8 @@ def x86_show_interupts(prec):
text += x86_show_irqstat(prec, "DFR", 'DEFERRED_ERROR', "Deferred Error APIC interrupts")
if constants.LX_CONFIG_X86_MCE:
- text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions")
- text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls")
+ text += x86_show_irqstat(prec, "MCE", "MCE_EXCEPTION", "Machine check exceptions")
+ text += x86_show_irqstat(prec, "MCP", "MCE_POLL", "Machine check polls")
if constants.LX_CONFIG_X86_HV_CALLBACK_VECTOR:
text += x86_show_irqstat(prec, 'HYP', 'HYPERVISOR_CALLBACK', 'Hypervisor callback interrupts')
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 07/14] genirq: Calculate precision only when required
2026-03-20 13:21 ` [patch v2 07/14] genirq: Calculate precision only when required Thomas Gleixner
@ 2026-03-25 19:47 ` Radu Rendec
0 siblings, 0 replies; 43+ messages in thread
From: Radu Rendec @ 2026-03-25 19:47 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On Fri, 2026-03-20 at 14:21 +0100, Thomas Gleixner wrote:
> Calculating the precision of the interrupt number column on every initial
> show_interrupt() invocation is a pointless exercise as the underlying
> maximum number of interrupts rarely changes.
>
> Calculate it only when that number is modified and let show_interrupts()
> use the cached value.
>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
> ---
> kernel/irq/internals.h | 6 ++++++
> kernel/irq/irqdesc.c | 10 ++++++----
> kernel/irq/proc.c | 28 +++++++++++++++++++---------
> 3 files changed, 31 insertions(+), 13 deletions(-)
>
> --- a/kernel/irq/internals.h
> +++ b/kernel/irq/internals.h
> @@ -134,6 +134,12 @@ static inline void unregister_handler_pr
> static inline void irq_proc_update_valid(struct irq_desc *desc) { }
> #endif
>
> +#if defined(CONFIG_PROC_FS) && defined(CONFIG_GENERIC_IRQ_SHOW)
> +void irq_proc_calc_prec(void);
> +#else
> +static inline void irq_proc_calc_prec(void) { }
> +#endif
> +
> extern bool irq_can_set_affinity_usr(unsigned int irq);
>
> extern int irq_do_set_affinity(struct irq_data *data,
> --- a/kernel/irq/irqdesc.c
> +++ b/kernel/irq/irqdesc.c
> @@ -157,13 +157,12 @@ EXPORT_SYMBOL_GPL(irq_get_nr_irqs);
> *
> * Return: @nr.
> */
> -unsigned int irq_set_nr_irqs(unsigned int nr)
> +unsigned int __init irq_set_nr_irqs(unsigned int nr)
> {
> total_nr_irqs = nr;
> -
> + irq_proc_calc_prec();
> return nr;
> }
> -EXPORT_SYMBOL_GPL(irq_set_nr_irqs);
>
> static DEFINE_MUTEX(sparse_irq_lock);
> static struct maple_tree sparse_irqs = MTREE_INIT_EXT(sparse_irqs,
> @@ -544,6 +543,7 @@ static bool irq_expand_nr_irqs(unsigned
> if (nr > MAX_SPARSE_IRQS)
> return false;
> total_nr_irqs = nr;
> + irq_proc_calc_prec();
> return true;
> }
>
> @@ -572,6 +572,7 @@ int __init early_irq_init(void)
> desc = alloc_desc(i, node, 0, NULL, NULL);
> irq_insert_desc(i, desc);
> }
> + irq_proc_calc_prec();
> return arch_early_irq_init();
> }
>
> @@ -592,7 +593,7 @@ int __init early_irq_init(void)
>
> init_irq_default_affinity();
>
> - printk(KERN_INFO "NR_IRQS: %d\n", NR_IRQS);
> + pr_info("NR_IRQS: %d\n", NR_IRQS);
>
> count = ARRAY_SIZE(irq_desc);
>
> @@ -602,6 +603,7 @@ int __init early_irq_init(void)
> goto __free_desc_res;
> }
>
> + irq_proc_calc_prec();
> return arch_early_irq_init();
>
> __free_desc_res:
> --- a/kernel/irq/proc.c
> +++ b/kernel/irq/proc.c
> @@ -457,10 +457,21 @@ int __weak arch_show_interrupts(struct s
> return 0;
> }
>
> +static int irq_num_prec __read_mostly = 3;
> +
> #ifndef ACTUAL_NR_IRQS
> # define ACTUAL_NR_IRQS total_nr_irqs
> #endif
>
> +void irq_proc_calc_prec(void)
> +{
> + unsigned int prec, n;
> +
> + for (prec = 3, n = 1000; prec < 10 && n <= total_nr_irqs; ++prec)
> + n *= 10;
> + WRITE_ONCE(irq_num_prec, prec);
> +}
> +
> #define ZSTR1 " 0"
> #define ZSTR1_LEN (sizeof(ZSTR1) - 1)
> #define ZSTR16 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 ZSTR1 \
> @@ -499,8 +510,7 @@ void irq_proc_emit_counts(struct seq_fil
>
> int show_interrupts(struct seq_file *p, void *v)
> {
> - const unsigned int nr_irqs = irq_get_nr_irqs();
> - static int prec;
> + int prec = READ_ONCE(irq_num_prec);
>
> int i = *(loff_t *) v, j;
> struct irqaction *action;
> @@ -514,9 +524,6 @@ int show_interrupts(struct seq_file *p,
>
> /* print header and calculate the width of the first column */
> if (i == 0) {
> - for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
> - j *= 10;
> -
> seq_printf(p, "%*s", prec + 8, "");
> for_each_online_cpu(j)
> seq_printf(p, "CPU%-8d", j);
> @@ -552,13 +559,16 @@ int show_interrupts(struct seq_file *p,
> } else {
> seq_printf(p, "%8s", "None");
> }
> +
> + seq_putc(p, ' ');
> if (desc->irq_data.domain)
> - seq_printf(p, " %*lu", prec, desc->irq_data.hwirq);
> + seq_put_decimal_ull_width(p, "", desc->irq_data.hwirq, prec);
> else
> seq_printf(p, " %*s", prec, "");
> -#ifdef CONFIG_GENERIC_IRQ_SHOW_LEVEL
> - seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
> -#endif
> +
> + if (IS_ENABLED(CONFIG_GENERIC_IRQ_SHOW_LEVEL))
> + seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge");
> +
> if (desc->name)
> seq_printf(p, "-%-8s", desc->name);
>
Reviewed-by: Radu Rendec <radu@rendec.net>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-25 19:20 ` Radu Rendec
@ 2026-03-25 22:52 ` Thomas Gleixner
2026-03-25 22:54 ` Florian Fainelli
2026-03-26 12:34 ` Radu Rendec
0 siblings, 2 replies; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-25 22:52 UTC (permalink / raw)
To: Radu Rendec, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Florian Fainelli
On Wed, Mar 25 2026 at 15:20, Radu Rendec wrote:
> On Tue, 2026-03-24 at 16:32 -0400, Radu Rendec wrote:
>
> I tested it, and it's very close but requires a few extra changes,
> which I included as a patch at the bottom.
All of this is overly complicated. That can directly use the
irq_stat_info array and therefore stays always in sync without ever
touching that python nightmare again.
Replacement patch below (works only after 5/14 obviously).
> There's one more gotcha: in the next patch (patch 5) you rename nr_irqs
> to total_nr_irqs, and the change must be applied to interrupts.py as
> well. It's used at the beginning of the LxInterruptList.invoke()
> function.
Sigh, yes. This GDB python insanity is a pain, debugging it even more so.
Thanks,
tglx
---
diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py
index f4f715a8f0e3..7fde52317e84 100644
--- a/scripts/gdb/linux/interrupts.py
+++ b/scripts/gdb/linux/interrupts.py
@@ -97,13 +97,13 @@ def show_irq_err_count(prec):
text += "%*s: %10u\n" % (prec, "ERR", cnt['counter'])
return text
-def x86_show_irqstat(prec, pfx, field, desc):
- irq_stat = gdb.parse_and_eval("&irq_stat")
+def x86_show_irqstat(prec, pfx, idx, desc):
+ irq_stat = gdb.parse_and_eval("&irq_stat.counts[%d]" %idx)
text = "%*s: " % (prec, pfx)
for cpu in cpus.each_online_cpu():
stat = cpus.per_cpu(irq_stat, cpu)
- text += "%10u " % (stat[field])
- text += " %s\n" % (desc)
+ text += "%10u " % (stat.dereference())
+ text += desc
return text
def x86_show_mce(prec, var, pfx, desc):
@@ -115,34 +115,14 @@ def x86_show_mce(prec, var, pfx, desc):
return text
def x86_show_interupts(prec):
- text = x86_show_irqstat(prec, "NMI", '__nmi_count', 'Non-maskable interrupts')
-
- if constants.LX_CONFIG_X86_LOCAL_APIC:
- text += x86_show_irqstat(prec, "LOC", 'apic_timer_irqs', "Local timer interrupts")
- text += x86_show_irqstat(prec, "SPU", 'irq_spurious_count', "Spurious interrupts")
- text += x86_show_irqstat(prec, "PMI", 'apic_perf_irqs', "Performance monitoring interrupts")
- text += x86_show_irqstat(prec, "IWI", 'apic_irq_work_irqs', "IRQ work interrupts")
- text += x86_show_irqstat(prec, "RTR", 'icr_read_retry_count', "APIC ICR read retries")
- if utils.gdb_eval_or_none("x86_platform_ipi_callback") is not None:
- text += x86_show_irqstat(prec, "PLT", 'x86_platform_ipis', "Platform interrupts")
-
- if constants.LX_CONFIG_SMP:
- text += x86_show_irqstat(prec, "RES", 'irq_resched_count', "Rescheduling interrupts")
- text += x86_show_irqstat(prec, "CAL", 'irq_call_count', "Function call interrupts")
- text += x86_show_irqstat(prec, "TLB", 'irq_tlb_count', "TLB shootdowns")
-
- if constants.LX_CONFIG_X86_THERMAL_VECTOR:
- text += x86_show_irqstat(prec, "TRM", 'irq_thermal_count', "Thermal events interrupts")
+ info_type = gdb.lookup_type('struct irq_stat_info')
+ info = gdb.parse_and_eval('irq_stat_info')
- if constants.LX_CONFIG_X86_MCE_THRESHOLD:
- text += x86_show_irqstat(prec, "THR", 'irq_threshold_count', "Threshold APIC interrupts")
-
- if constants.LX_CONFIG_X86_MCE_AMD:
- text += x86_show_irqstat(prec, "DFR", 'irq_deferred_error_count', "Deferred Error APIC interrupts")
-
- if constants.LX_CONFIG_X86_MCE:
- text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions")
- text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls")
+ text = ""
+ for idx in range(int(info.type.sizeof / info_type.sizeof)):
+ pfx = info[idx]['symbol'].string()
+ desc = info[idx]['text'].string()
+ text += x86_show_irqstat(prec, pfx, idx, desc)
text += show_irq_err_count(prec)
@@ -151,11 +131,6 @@ def x86_show_interupts(prec):
if cnt is not None:
text += "%*s: %10u\n" % (prec, "MIS", cnt['counter'])
- if constants.LX_CONFIG_KVM:
- text += x86_show_irqstat(prec, "PIN", 'kvm_posted_intr_ipis', 'Posted-interrupt notification event')
- text += x86_show_irqstat(prec, "NPI", 'kvm_posted_intr_nested_ipis', 'Nested posted-interrupt event')
- text += x86_show_irqstat(prec, "PIW", 'kvm_posted_intr_wakeup_ipis', 'Posted-interrupt wakeup event')
-
return text
def arm_common_show_interrupts(prec):
@@ -209,7 +184,7 @@ class LxInterruptList(gdb.Command):
super(LxInterruptList, self).__init__("lx-interruptlist", gdb.COMMAND_DATA)
def invoke(self, arg, from_tty):
- nr_irqs = gdb.parse_and_eval("nr_irqs")
+ nr_irqs = gdb.parse_and_eval("total_nr_irqs")
prec = 3
j = 1000
while prec < 10 and j <= nr_irqs:
^ permalink raw reply related [flat|nested] 43+ messages in thread
* Re: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-25 22:52 ` Thomas Gleixner
@ 2026-03-25 22:54 ` Florian Fainelli
2026-03-26 10:29 ` Thomas Gleixner
2026-03-26 12:34 ` Radu Rendec
1 sibling, 1 reply; 43+ messages in thread
From: Florian Fainelli @ 2026-03-25 22:54 UTC (permalink / raw)
To: Thomas Gleixner, Radu Rendec, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On 3/25/26 15:52, Thomas Gleixner wrote:
> On Wed, Mar 25 2026 at 15:20, Radu Rendec wrote:
>> On Tue, 2026-03-24 at 16:32 -0400, Radu Rendec wrote:
>>
>> I tested it, and it's very close but requires a few extra changes,
>> which I included as a patch at the bottom.
>
> All of this is overly complicated. That can directly use the
> irq_stat_info array and therefore stays always in sync without ever
> touching that python nightmare again.
>
> Replacement patch below (works only after 5/14 obviously).
>
>> There's one more gotcha: in the next patch (patch 5) you rename nr_irqs
>> to total_nr_irqs, and the change must be applied to interrupts.py as
>> well. It's used at the beginning of the LxInterruptList.invoke()
>> function.
>
> Sigh, yes. This GDB python insanity is a pain, debugging it even more so.
All matter of perspective but kudos to you for trying to fix it, that is
much appreciated.
--
Florian
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-25 22:54 ` Florian Fainelli
@ 2026-03-26 10:29 ` Thomas Gleixner
2026-03-26 23:00 ` Florian Fainelli
0 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-26 10:29 UTC (permalink / raw)
To: Florian Fainelli, Radu Rendec, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
Florian!
On Wed, Mar 25 2026 at 15:54, Florian Fainelli wrote:
> On 3/25/26 15:52, Thomas Gleixner wrote:
>> Sigh, yes. This GDB python insanity is a pain, debugging it even more so.
>
> All matter of perspective but kudos to you for trying to fix it, that is
> much appreciated.
What bothers me most on the kernel side is that there is neither
annotation, which tells something is used in a gdb script, nor a compile
time check.
So if Radu hadn't noticed and pointed it out, I wouldn't have known
about it at all.
The insanity snark was just my annoyance about the python integration
into gdb. It's amazing that a debugger provides an extension interface
which is undebuggable. But admittedly it's at least consistent with the
rest of GDB's usability.
I know why I prefer the DBT (Debug By Thinking) debugger. :)
Thanks,
tglx
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-25 22:52 ` Thomas Gleixner
2026-03-25 22:54 ` Florian Fainelli
@ 2026-03-26 12:34 ` Radu Rendec
1 sibling, 0 replies; 43+ messages in thread
From: Radu Rendec @ 2026-03-26 12:34 UTC (permalink / raw)
To: Thomas Gleixner, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman, Florian Fainelli
On Wed, 2026-03-25 at 23:52 +0100, Thomas Gleixner wrote:
> On Wed, Mar 25 2026 at 15:20, Radu Rendec wrote:
> > On Tue, 2026-03-24 at 16:32 -0400, Radu Rendec wrote:
> >
> > I tested it, and it's very close but requires a few extra changes,
> > which I included as a patch at the bottom.
>
> All of this is overly complicated. That can directly use the
> irq_stat_info array and therefore stays always in sync without ever
> touching that python nightmare again.
Of course! That's why you created irq_stat_info in the first place, and
the gdb/python approach is now similar to what you did on the kernel
side. I guess the solution was too obvious for either of us to see on
the first iteration :)
> Replacement patch below (works only after 5/14 obviously).
I tested it, and I can confirm it works.
> > There's one more gotcha: in the next patch (patch 5) you rename nr_irqs
> > to total_nr_irqs, and the change must be applied to interrupts.py as
> > well. It's used at the beginning of the LxInterruptList.invoke()
> > function.
>
> Sigh, yes. This GDB python insanity is a pain, debugging it even more so.
I agree, debugging it is not very friendly. But in case you ever have
to deal with it again, you may find this helpful:
* print() in python code works (you'll see the output in gdb);
* there's a "set python print-stack full" command; once enabled,
you'll see something like this:
(gdb) lx-interruptlist
Traceback (most recent call last):
File "/..../scripts/gdb/linux/interrupts.py", line 187, in invoke
nr_irqs = gdb.parse_and_eval("total_nr_irqs1")
gdb.error: No symbol "total_nr_irqs1" in current context.
> ---
> diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py
> index f4f715a8f0e3..7fde52317e84 100644
> --- a/scripts/gdb/linux/interrupts.py
> +++ b/scripts/gdb/linux/interrupts.py
> @@ -97,13 +97,13 @@ def show_irq_err_count(prec):
> text += "%*s: %10u\n" % (prec, "ERR", cnt['counter'])
> return text
>
> -def x86_show_irqstat(prec, pfx, field, desc):
> - irq_stat = gdb.parse_and_eval("&irq_stat")
> +def x86_show_irqstat(prec, pfx, idx, desc):
> + irq_stat = gdb.parse_and_eval("&irq_stat.counts[%d]" %idx)
> text = "%*s: " % (prec, pfx)
> for cpu in cpus.each_online_cpu():
> stat = cpus.per_cpu(irq_stat, cpu)
> - text += "%10u " % (stat[field])
> - text += " %s\n" % (desc)
> + text += "%10u " % (stat.dereference())
> + text += desc
> return text
>
> def x86_show_mce(prec, var, pfx, desc):
> @@ -115,34 +115,14 @@ def x86_show_mce(prec, var, pfx, desc):
> return text
>
> def x86_show_interupts(prec):
> - text = x86_show_irqstat(prec, "NMI", '__nmi_count', 'Non-maskable interrupts')
> -
> - if constants.LX_CONFIG_X86_LOCAL_APIC:
> - text += x86_show_irqstat(prec, "LOC", 'apic_timer_irqs', "Local timer interrupts")
> - text += x86_show_irqstat(prec, "SPU", 'irq_spurious_count', "Spurious interrupts")
> - text += x86_show_irqstat(prec, "PMI", 'apic_perf_irqs', "Performance monitoring interrupts")
> - text += x86_show_irqstat(prec, "IWI", 'apic_irq_work_irqs', "IRQ work interrupts")
> - text += x86_show_irqstat(prec, "RTR", 'icr_read_retry_count', "APIC ICR read retries")
> - if utils.gdb_eval_or_none("x86_platform_ipi_callback") is not None:
> - text += x86_show_irqstat(prec, "PLT", 'x86_platform_ipis', "Platform interrupts")
> -
> - if constants.LX_CONFIG_SMP:
> - text += x86_show_irqstat(prec, "RES", 'irq_resched_count', "Rescheduling interrupts")
> - text += x86_show_irqstat(prec, "CAL", 'irq_call_count', "Function call interrupts")
> - text += x86_show_irqstat(prec, "TLB", 'irq_tlb_count', "TLB shootdowns")
> -
> - if constants.LX_CONFIG_X86_THERMAL_VECTOR:
> - text += x86_show_irqstat(prec, "TRM", 'irq_thermal_count', "Thermal events interrupts")
> + info_type = gdb.lookup_type('struct irq_stat_info')
> + info = gdb.parse_and_eval('irq_stat_info')
>
> - if constants.LX_CONFIG_X86_MCE_THRESHOLD:
> - text += x86_show_irqstat(prec, "THR", 'irq_threshold_count', "Threshold APIC interrupts")
> -
> - if constants.LX_CONFIG_X86_MCE_AMD:
> - text += x86_show_irqstat(prec, "DFR", 'irq_deferred_error_count', "Deferred Error APIC interrupts")
> -
> - if constants.LX_CONFIG_X86_MCE:
> - text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions")
> - text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls")
> + text = ""
> + for idx in range(int(info.type.sizeof / info_type.sizeof)):
> + pfx = info[idx]['symbol'].string()
> + desc = info[idx]['text'].string()
> + text += x86_show_irqstat(prec, pfx, idx, desc)
>
> text += show_irq_err_count(prec)
>
> @@ -151,11 +131,6 @@ def x86_show_interupts(prec):
> if cnt is not None:
> text += "%*s: %10u\n" % (prec, "MIS", cnt['counter'])
>
> - if constants.LX_CONFIG_KVM:
> - text += x86_show_irqstat(prec, "PIN", 'kvm_posted_intr_ipis', 'Posted-interrupt notification event')
> - text += x86_show_irqstat(prec, "NPI", 'kvm_posted_intr_nested_ipis', 'Nested posted-interrupt event')
> - text += x86_show_irqstat(prec, "PIW", 'kvm_posted_intr_wakeup_ipis', 'Posted-interrupt wakeup event')
> -
> return text
>
> def arm_common_show_interrupts(prec):
> @@ -209,7 +184,7 @@ class LxInterruptList(gdb.Command):
> super(LxInterruptList, self).__init__("lx-interruptlist", gdb.COMMAND_DATA)
>
> def invoke(self, arg, from_tty):
> - nr_irqs = gdb.parse_and_eval("nr_irqs")
> + nr_irqs = gdb.parse_and_eval("total_nr_irqs")
> prec = 3
> j = 1000
> while prec < 10 and j <= nr_irqs:
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 08/14] genirq: Add rcuref count to struct irq_desc
2026-03-20 13:22 ` [patch v2 08/14] genirq: Add rcuref count to struct irq_desc Thomas Gleixner
@ 2026-03-26 18:43 ` Dmitry Ilvokhin
0 siblings, 0 replies; 43+ messages in thread
From: Dmitry Ilvokhin @ 2026-03-26 18:43 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: LKML, x86, Neil Horman, Radu Rendec
On Fri, Mar 20, 2026 at 02:22:04PM +0100, Thomas Gleixner wrote:
> Prepare for a smarter iterator for /proc/interrupts so that the next
> interrupt descriptor can be cached after lookup.
>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Reviewed-by: Dmitry Ilvokhin <d@ilvokhin.com>
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 09/14] genirq: Expose irq_find_desc_at_or_after() in core code
2026-03-20 13:22 ` [patch v2 09/14] genirq: Expose irq_find_desc_at_or_after() in core code Thomas Gleixner
@ 2026-03-26 19:13 ` Dmitry Ilvokhin
2026-03-26 21:11 ` Thomas Gleixner
0 siblings, 1 reply; 43+ messages in thread
From: Dmitry Ilvokhin @ 2026-03-26 19:13 UTC (permalink / raw)
To: Thomas Gleixner; +Cc: LKML, x86, Neil Horman, Radu Rendec
On Fri, Mar 20, 2026 at 02:22:09PM +0100, Thomas Gleixner wrote:
> --- in preparation for a smarter iterator for /proc/interrupts.
>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> ---
> kernel/irq/internals.h | 2 ++
> kernel/irq/irqdesc.c | 12 +++++-------
> 2 files changed, 7 insertions(+), 7 deletions(-)
>
[...]
>
> -static unsigned int irq_find_at_or_after(unsigned int offset)
> +struct irq_desc *irq_find_desc_at_or_after(unsigned int offset)
> {
> unsigned long index = offset;
> - struct irq_desc *desc;
> -
> - guard(rcu)();
Is the drop of guard(rcu)() intentional here? irq_get_next_irq()
dereferences the returned pointer after mt_find() has dropped
rcu_read_unlock(). Should it hold its own guard(rcu)() to keep the same
behaviour as before?
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 09/14] genirq: Expose irq_find_desc_at_or_after() in core code
2026-03-26 19:13 ` Dmitry Ilvokhin
@ 2026-03-26 21:11 ` Thomas Gleixner
2026-03-26 21:25 ` Thomas Gleixner
0 siblings, 1 reply; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-26 21:11 UTC (permalink / raw)
To: Dmitry Ilvokhin; +Cc: LKML, x86, Neil Horman, Radu Rendec
On Thu, Mar 26 2026 at 19:13, Dmitry Ilvokhin wrote:
>> -static unsigned int irq_find_at_or_after(unsigned int offset)
>> +struct irq_desc *irq_find_desc_at_or_after(unsigned int offset)
>> {
>> unsigned long index = offset;
>> - struct irq_desc *desc;
>> -
>> - guard(rcu)();
>
> Is the drop of guard(rcu)() intentional here? irq_get_next_irq()
> dereferences the returned pointer after mt_find() has dropped
> rcu_read_unlock(). Should it hold its own guard(rcu)() to keep the same
> behaviour as before?
Good catch. The caller of irq_find_at_or_after() needs to hold it.
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 09/14] genirq: Expose irq_find_desc_at_or_after() in core code
2026-03-26 21:11 ` Thomas Gleixner
@ 2026-03-26 21:25 ` Thomas Gleixner
0 siblings, 0 replies; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-26 21:25 UTC (permalink / raw)
To: Dmitry Ilvokhin; +Cc: LKML, x86, Neil Horman, Radu Rendec
On Thu, Mar 26 2026 at 22:11, Thomas Gleixner wrote:
> On Thu, Mar 26 2026 at 19:13, Dmitry Ilvokhin wrote:
>>> -static unsigned int irq_find_at_or_after(unsigned int offset)
>>> +struct irq_desc *irq_find_desc_at_or_after(unsigned int offset)
>>> {
>>> unsigned long index = offset;
>>> - struct irq_desc *desc;
>>> -
>>> - guard(rcu)();
>>
>> Is the drop of guard(rcu)() intentional here? irq_get_next_irq()
>> dereferences the returned pointer after mt_find() has dropped
>> rcu_read_unlock(). Should it hold its own guard(rcu)() to keep the same
>> behaviour as before?
>
> Good catch. The caller of irq_find_at_or_after() needs to hold it.
Just double checked. I have lockdep enabled, but it can't catch it the
way the code is written. The new iterater code holds it otherwise
rcuref_get() would have triggered it.
I've added it back and added a lockdep assert into
irq_find_at_or_after().
You just caught it before I was preparing the V3 post. Let me re-run the
test before I do so.
Thanks,
tglx
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-26 10:29 ` Thomas Gleixner
@ 2026-03-26 23:00 ` Florian Fainelli
2026-03-27 11:03 ` Thomas Gleixner
0 siblings, 1 reply; 43+ messages in thread
From: Florian Fainelli @ 2026-03-26 23:00 UTC (permalink / raw)
To: Thomas Gleixner, Radu Rendec, LKML; +Cc: x86, Dmitry Ilvokhin, Neil Horman
On 3/26/26 03:29, Thomas Gleixner wrote:
> Florian!
>
> On Wed, Mar 25 2026 at 15:54, Florian Fainelli wrote:
>> On 3/25/26 15:52, Thomas Gleixner wrote:
>>> Sigh, yes. This GDB python insanity is a pain, debugging it even more so.
>>
>> All matter of perspective but kudos to you for trying to fix it, that is
>> much appreciated.
>
> What bothers me most on the kernel side is that there is neither
> annotation, which tells something is used in a gdb script, nor a compile
> time check.
>
> So if Radu hadn't noticed and pointed it out, I wouldn't have known
> about it at all.
Yes that's a fair point and it has been a complaint of mine whenever I
had to use the GDB scripts because they get out of sync so easily with
the code they are trying to parse, at some point it's not even fun.
This should still apply if you want to take it:
https://lore.kernel.org/all/20250625231053.1134589-6-florian.fainelli@broadcom.com/
As far as ensuring that the C/Rust code stays in sync with GDB at
compile time, did not have any bright idea about how to do that.
>
> The insanity snark was just my annoyance about the python integration
> into gdb. It's amazing that a debugger provides an extension interface
> which is undebuggable. But admittedly it's at least consistent with the
> rest of GDB's usability.
Oh yes, that's something that was learned the hard way on my side too,
not sure why that's not the default, much hair pulling resulted from
having incomplete backtraces.
--
Florian
^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [patch v2 04/14] x86/irq: Make irqstats array based
2026-03-26 23:00 ` Florian Fainelli
@ 2026-03-27 11:03 ` Thomas Gleixner
0 siblings, 0 replies; 43+ messages in thread
From: Thomas Gleixner @ 2026-03-27 11:03 UTC (permalink / raw)
To: Florian Fainelli, Radu Rendec, LKML
Cc: x86, Dmitry Ilvokhin, Jan Kiszka, Kieran Bingham
Florian!
On Thu, Mar 26 2026 at 16:00, Florian Fainelli wrote:
> On 3/26/26 03:29, Thomas Gleixner wrote:
>> So if Radu hadn't noticed and pointed it out, I wouldn't have known
>> about it at all.
>
> Yes that's a fair point and it has been a complaint of mine whenever I
> had to use the GDB scripts because they get out of sync so easily with
> the code they are trying to parse, at some point it's not even fun.
>
> This should still apply if you want to take it:
>
> https://lore.kernel.org/all/20250625231053.1134589-6-florian.fainelli@broadcom.com/
That's just delegating something unenforcable, not automatically
testable to other maintainers. So it's not solving anything.
You want something which breaks the build for the submitter so that he
needs to go and fix up the gdb script or at least ask for help if he
can't figure it out on his own. If he does not build with debug enabled
the CI robots will catch it and send him back to the drawing board.
> As far as ensuring that the C/Rust code stays in sync with GDB at
> compile time, did not have any bright idea about how to do that.
Add parseable annotation into each of the snake scripts:
class kernel_dep(object):
def __init__(self, name, member=None, config=None, type=TYPE_VAR)
self.name = name
self.member = member
self.config = config
self.type = type
and per unit you add:
kernel_dependencies = [
kernel_dep(TYPE_VAR, 'nr_irqs'),
kernel_dep(TYPE_VAR, 'irq_stat', '__nmi_count', 'CONFIG_X86'),
kernel_dep('irq_chip', 'name', type=TYPE_STRUCT),
....
]
or something daft like that. I'm 100% sure that real snake experts will
come up with a way more elegant solution, but you get the idea.
Then have a script which collects this information from the source files
and validates it against the debug information in vmlinux. Obviously
this would allow you to validate the snake code against those
dependencies as well.
The first two entries would have caught both changes I made, the array
conversion of irq_stat and the rename of nr_irqs.
Hmm?
Thanks,
tglx
^ permalink raw reply [flat|nested] 43+ messages in thread
end of thread, other threads:[~2026-03-27 11:03 UTC | newest]
Thread overview: 43+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-03-20 13:21 [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Thomas Gleixner
2026-03-20 13:21 ` [patch v2 01/14] x86/irq: Optimize interrupts decimals printing Thomas Gleixner
2026-03-21 16:10 ` Radu Rendec
2026-03-20 13:21 ` [patch v2 02/14] genirq/proc: Avoid formatting zero counts in /proc/interrupts Thomas Gleixner
2026-03-21 16:38 ` Radu Rendec
2026-03-20 13:21 ` [patch v2 03/14] genirq/proc: Utilize irq_desc::tot_count to avoid evaluation Thomas Gleixner
2026-03-22 19:59 ` Radu Rendec
2026-03-20 13:21 ` [patch v2 04/14] x86/irq: Make irqstats array based Thomas Gleixner
2026-03-20 16:39 ` Michael Kelley
2026-03-21 16:38 ` Thomas Gleixner
2026-03-21 20:32 ` Michael Kelley
2026-03-23 19:24 ` Radu Rendec
2026-03-24 19:54 ` Thomas Gleixner
2026-03-24 20:21 ` Thomas Gleixner
2026-03-24 20:32 ` Radu Rendec
2026-03-25 19:20 ` Radu Rendec
2026-03-25 22:52 ` Thomas Gleixner
2026-03-25 22:54 ` Florian Fainelli
2026-03-26 10:29 ` Thomas Gleixner
2026-03-26 23:00 ` Florian Fainelli
2026-03-27 11:03 ` Thomas Gleixner
2026-03-26 12:34 ` Radu Rendec
2026-03-20 13:21 ` [patch v2 05/14] genirq: Expose nr_irqs in core code Thomas Gleixner
2026-03-23 19:48 ` Radu Rendec
2026-03-23 21:27 ` Thomas Gleixner
2026-03-20 13:21 ` [patch v2 06/14] genirq: Cache the condition for /proc/interrupts exposure Thomas Gleixner
2026-03-23 20:58 ` Radu Rendec
2026-03-24 20:31 ` Thomas Gleixner
2026-03-24 20:36 ` Radu Rendec
2026-03-20 13:21 ` [patch v2 07/14] genirq: Calculate precision only when required Thomas Gleixner
2026-03-25 19:47 ` Radu Rendec
2026-03-20 13:22 ` [patch v2 08/14] genirq: Add rcuref count to struct irq_desc Thomas Gleixner
2026-03-26 18:43 ` Dmitry Ilvokhin
2026-03-20 13:22 ` [patch v2 09/14] genirq: Expose irq_find_desc_at_or_after() in core code Thomas Gleixner
2026-03-26 19:13 ` Dmitry Ilvokhin
2026-03-26 21:11 ` Thomas Gleixner
2026-03-26 21:25 ` Thomas Gleixner
2026-03-20 13:22 ` [patch v2 10/14] genirq/proc: Speed up /proc/interrupts iteration Thomas Gleixner
2026-03-20 13:22 ` [patch v2 11/14] [RFC] genirq: Cache target CPU for single CPU affinities Thomas Gleixner
2026-03-20 13:22 ` [patch v2 12/14] [RFC] genirq/proc: Provide binary statistic interface Thomas Gleixner
2026-03-20 13:22 ` [patch v2 13/14] [RFC] genirq/proc: Provide architecture specific binary statistics Thomas Gleixner
2026-03-20 13:22 ` [patch v2 14/14] [RFC] x86/irq: Hook up architecture specific stats Thomas Gleixner
2026-03-20 16:45 ` [patch v2 00/14] Improve /proc/interrupts further and add a binary interface Michael Kelley
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox