From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 829822BD5B4 for ; Thu, 26 Mar 2026 21:56:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal:i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774562187; cv=none; b=qnvQw1KeBlZoyIrauQsTM2v+tB0buqpwjav9WttYNOy5lhsCUB5/3bn/qitg+r9pk2gqt3HFGSVgGfoR2qZbYE9JlKgElb5usLXi/j0qgJwIDgA8+AZaFjJhod3GD92uBJFxCqdjE6Lb7OqHOyikw1eOsfjZpfRzLJrNdWk8eXA= ARC-Message-Signature:i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1774562187; c=relaxed/simple; bh=RutSKvFjSpEymrwYjn1arUn3q1OK5ANqf6eGf3EicPY=; h=Date:Message-ID:From:To:Cc:Subject; b=OipgM5QWFva10RLwOA/FDW/JWOdtvBW9dxsUiIg85lsXMveaACKwfu2sRwljdeMVyn/xK4ft0IQhUkcX/NuDuZOVQc5EEIRNO1yKJw3B5tPWsDcb3MMknf30ZhQlMKaQws+yz/ZFaQJ3rFO9CTaTnUr3QvhTRNGImS1o/8tjasA= ARC-Authentication-Results:i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=WuepKL+F; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="WuepKL+F" Received: by smtp.kernel.org (Postfix) with ESMTPSA id EE2AAC116C6; Thu, 26 Mar 2026 21:56:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1774562187; bh=RutSKvFjSpEymrwYjn1arUn3q1OK5ANqf6eGf3EicPY=; h=Date:From:To:Cc:Subject:From; b=WuepKL+FDKg8W1w8VARvyONTq21WauV4KUYb7LofRjeHmR8jRcoDe8cEjh3JhEGYK rdg1xL8lJSwhTfHVWB/fMDLRxnjYpE6E6XEkG8Qd8DNYbft+MuP+05Tw8lWyN55ixu ZclEs35hvQ2wsCW2ZJcJSVbQs3Oy8K490rDR/FGw3ZW+Cv7SGqU01UnBFfjYkaSEF/ zJwYEbcVqRTTxxkBnJCrrhJOfKsL0Dw52K5wbO9AuGG3My24e5NW0CNGnglHxtljzr 47joz3VFtPIEGFNu9iSDXevPbUfHcDHmGKQzaQXjfNcfthQ4Uu9Epn0SBtDzrStlJM jMQIDmI2LvZzg== Date: Thu, 26 Mar 2026 22:56:22 +0100 Message-ID: <20260326214345.019130211@kernel.org> User-Agent: quilt/0.68 From: Thomas Gleixner To: LKML Cc: x86@kernel.org, Michael Kelley , Dmitry Ilvokhin , Radu Rendec , Jan Kiszka , Kieran Bingham , Florian Fainelli Subject: [patch V3 00/14] Improve /proc/interrupts further Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: This is a follow up to v2 which can be found here: https://lore.kernel.org/20260320131108.344376329@kernel.org The v1 cover letter contains a full analysis, explanation and numbers: https://lore.kernel.org/20260303150539.513068586@kernel.org TLDR: - The performance of reading of /proc/interrupts has been improved piecewise over the years, but most of the low hanging fruit has been left on the table. Changes vs. V2: - Addressed the valuable review comments from Michael, Radu and Dmitry. Thanks! - Addressed the 0-day fallout (missing #ifdef guards, typos) - More updates to the x86 irq stats: - Provide a mechanism for interrupts which should never happen to skip them by default and only remove the skip condition if one occurs (Spurious and ICR read retry) - Use the same mechanism to handle the IOAPIC misrouted and the PIC/APIC error counts - Updated the out of sync GDB script - pointed out by Radu - Use the new array based x86 stats - Ensure visually tabular output - Reworked the 'first line' mechanism by using proc_seq_create_private() which also simplifies the precision and chip name width adjustments - Made the output format visually tabular in /proc/interrupts - Picked up tags where appropriate - Dropped the binary interface parts as they were RFC and just for demonstration. Let's see if anyone cares down the road. - Tagged the series so the irq/core branch can be updated without losing the submitted content. Delta patch against v2 (w/o the binary RFC part) is below. The series applies on top of v7.0-rc3 and is also available via git: git://git.kernel.org/pub/scm/linux/kernel/git/tglx/devel.git irq-proc-v3 Thanks, tglx --- diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index d68f587f0b7d..305774b67995 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -1032,7 +1032,7 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) * Unmasking the LVTPC is not required as the Mask (M) bit of the LVT * PMI entry is not set by the local APIC when a PMC overflow occurs */ - inc_irq_stat(APIC_PERF); + inc_perf_irq_stat(); done: cpuc->enabled = pmu_enabled; diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index b22259b54685..0e36f5580e8d 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -1403,7 +1403,7 @@ perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs) handled += perf_ibs_handle_irq(&perf_ibs_op, regs); if (handled) - inc_irq_stat(APIC_PERF); + inc_perf_irq_stat(); perf_sample_event_took(sched_clock() - stamp); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index ce9f9d4cd5fd..e1e9aaa4f11a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1747,7 +1747,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) } if (handled) - inc_irq_stat(APIC_PERF); + inc_perf_irq_stat(); return handled; } diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index e5c85c5c8f87..297916f29c09 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3504,7 +3504,7 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) int bit; int handled = 0; - inc_irq_stat(APIC_PERF); + inc_perf_irq_stat(); /* * Ignore a range of extra bits in status that do not indicate diff --git a/arch/x86/events/intel/knc.c b/arch/x86/events/intel/knc.c index 537838404524..e887adc108ac 100644 --- a/arch/x86/events/intel/knc.c +++ b/arch/x86/events/intel/knc.c @@ -238,7 +238,7 @@ static int knc_pmu_handle_irq(struct pt_regs *regs) goto done; } - inc_irq_stat(APIC_PERF); + inc_perf_irq_stat(); for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c index 1f7621938494..12bf293d42a5 100644 --- a/arch/x86/events/intel/p4.c +++ b/arch/x86/events/intel/p4.c @@ -1077,7 +1077,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) } if (handled) - inc_irq_stat(APIC_PERF); + inc_perf_irq_stat(); /* * When dealing with the unmasking of the LVTPC on P4 perf hw, it has diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c index f1a5d0347b08..4bc177badac2 100644 --- a/arch/x86/events/zhaoxin/core.c +++ b/arch/x86/events/zhaoxin/core.c @@ -373,7 +373,7 @@ static int zhaoxin_pmu_handle_irq(struct pt_regs *regs) else zhaoxin_pmu_ack_status(status); - inc_irq_stat(APIC_PERF); + inc_perf_irq_stat(); /* * CondChgd bit 63 doesn't mean any overflow status. Ignore diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index dcc96edb4f82..dea60d66d976 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -4,7 +4,7 @@ #include -enum { +enum irq_stat_counts { IRQ_COUNT_NMI, #ifdef CONFIG_X86_LOCAL_APIC IRQ_COUNT_APIC_TIMER, @@ -49,6 +49,10 @@ enum { #endif #ifdef CONFIG_X86_POSTED_MSI IRQ_COUNT_POSTED_MSI_NOTIFICATION, +#endif + IRQ_COUNT_PIC_APIC_ERROR, +#ifdef CONFIG_X86_IO_APIC + IRQ_COUNT_IOAPIC_MISROUTED, #endif IRQ_COUNT_MAX, }; @@ -68,14 +72,20 @@ DECLARE_PER_CPU_ALIGNED(struct pi_desc, posted_msi_pi_desc); #define __ARCH_IRQ_STAT #define inc_irq_stat(index) this_cpu_inc(irq_stat.counts[IRQ_COUNT_##index]) +void irq_stat_inc_and_enable(enum irq_stat_counts which); + +#ifdef CONFIG_X86_LOCAL_APIC +#define inc_perf_irq_stat() inc_irq_stat(APIC_PERF) +#else +#define inc_perf_irq_stat() do { } while (0) +#endif extern void ack_bad_irq(unsigned int irq); +#ifdef CONFIG_PROC_FS extern u64 arch_irq_stat_cpu(unsigned int cpu); #define arch_irq_stat_cpu arch_irq_stat_cpu - -extern u64 arch_irq_stat(void); -#define arch_irq_stat arch_irq_stat +#endif DECLARE_PER_CPU_CACHE_HOT(u16, __softirq_pending); #define local_softirq_pending_ref __softirq_pending diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index cbe19e669080..47727d0b540b 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -110,10 +110,6 @@ static inline void lock_vector_lock(void) {} static inline void unlock_vector_lock(void) {} #endif -/* Statistics */ -extern atomic_t irq_err_count; -extern atomic_t irq_mis_count; - extern void elcr_set_level_irq(unsigned int irq); extern char irq_entries_start[]; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e2de95b0862a..e4b19a288ee7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2108,7 +2108,7 @@ static noinline void handle_spurious_interrupt(u8 vector) trace_spurious_apic_entry(vector); - inc_irq_stat(SPURIOUS); + irq_stat_inc_and_enable(IRQ_COUNT_SPURIOUS); /* * If this is a spurious interrupt then do not acknowledge @@ -2180,7 +2180,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_error_interrupt) apic_write(APIC_ESR, 0); v = apic_read(APIC_ESR); apic_eoi(); - atomic_inc(&irq_err_count); + irq_stat_inc_and_enable(IRQ_COUNT_PIC_APIC_ERROR); apic_pr_debug("APIC error on CPU%d: %02x", smp_processor_id(), v); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 352ed5558cbc..7d7175d01228 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1575,8 +1575,6 @@ static unsigned int startup_ioapic_irq(struct irq_data *data) return was_pending; } -atomic_t irq_mis_count; - #ifdef CONFIG_GENERIC_PENDING_IRQ static bool io_apic_level_ack_pending(struct mp_chip_data *data) { @@ -1713,7 +1711,7 @@ static void ioapic_ack_level(struct irq_data *irq_data) * at the cpu. */ if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); + irq_stat_inc_and_enable(IRQ_COUNT_IOAPIC_MISROUTED); eoi_ioapic_pin(cfg->vector, irq_data->chip_data); } diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 3635c4d7b7f5..c627bee3b14f 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -120,7 +120,7 @@ u32 apic_mem_wait_icr_idle_timeout(void) for (cnt = 0; cnt < 1000; cnt++) { if (!(apic_read(APIC_ICR) & APIC_ICR_BUSY)) return 0; - inc_irq_stat(ICR_READ_RETRY); + irq_stat_inc_and_enable(IRQ_COUNT_ICR_READ_RETRY); udelay(100); } return APIC_ICR_BUSY; diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index f67063df6723..f7a86b94a0dd 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -214,7 +214,7 @@ static void mask_and_ack_8259A(struct irq_data *data) "spurious 8259A interrupt: IRQ%d.\n", irq); spurious_irq_mask |= irqmask; } - atomic_inc(&irq_err_count); + irq_stat_inc_and_enable(IRQ_COUNT_PIC_APIC_ERROR); /* * Theoretically we do not have to handle this IRQ, * but in Linux this does not cause problems and is diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 2bd8c08f8d91..0b3723cec0b9 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -39,8 +39,6 @@ EXPORT_PER_CPU_SYMBOL(__softirq_pending); DEFINE_PER_CPU_CACHE_HOT(struct irq_stack *, hardirq_stack_ptr); -atomic_t irq_err_count; - /* * 'what should we do if we get a hw irq event on an illegal vector'. * each architecture has to answer this themselves. @@ -68,56 +66,65 @@ struct irq_stat_info { const char *text; }; +#define DEFAULT_SUPPRESSED_VECTOR UINT_MAX + #define ISS(idx, sym, txt) [IRQ_COUNT_##idx] = { .symbol = sym, .text = txt } #define ITS(idx, sym, txt) [IRQ_COUNT_##idx] = \ { .skip_vector = idx## _VECTOR, .symbol = sym, .text = txt } +#define IDS(idx, sym, txt) [IRQ_COUNT_##idx] = \ + { .skip_vector = DEFAULT_SUPPRESSED_VECTOR, .symbol = sym, .text = txt } + static struct irq_stat_info irq_stat_info[IRQ_COUNT_MAX] __ro_after_init = { - ISS(NMI, "NMI", " Non-maskable interrupts\n"), + ISS(NMI, "NMI", " Non-maskable interrupts\n"), #ifdef CONFIG_X86_LOCAL_APIC - ISS(APIC_TIMER, "LOC", " Local timer interrupts\n"), - ISS(SPURIOUS, "SPU", " Spurious interrupts\n"), - ISS(APIC_PERF, "PMI", " Performance monitoring interrupts\n"), - ISS(IRQ_WORK, "IWI", " IRQ work interrupts\n"), - ISS(ICR_READ_RETRY, "RTR", " APIC ICR read retries\n"), - ISS(X86_PLATFORM_IPI, "PLT", " Platform interrupts\n"), + ISS(APIC_TIMER, "LOC", " Local timer interrupts\n"), + IDS(SPURIOUS, "SPU", " Spurious interrupts\n"), + ISS(APIC_PERF, "PMI", " Performance monitoring interrupts\n"), + ISS(IRQ_WORK, "IWI", " IRQ work interrupts\n"), + IDS(ICR_READ_RETRY, "RTR", " APIC ICR read retries\n"), + ISS(X86_PLATFORM_IPI, "PLT", " Platform interrupts\n"), #endif #ifdef CONFIG_SMP - ISS(RESCHEDULE, "RES", " Rescheduling interrupts\n"), - ISS(CALL_FUNCTION, "CAL", " Function call interrupts\n"), + ISS(RESCHEDULE, "RES", " Rescheduling interrupts\n"), + ISS(CALL_FUNCTION, "CAL", " Function call interrupts\n"), #endif - ISS(TLB, "TLB", " TLB shootdowns\n"), + ISS(TLB, "TLB", " TLB shootdowns\n"), #ifdef CONFIG_X86_THERMAL_VECTOR - ISS(THERMAL_APIC, "TRM", " Thermal event interrupt\n"), + ISS(THERMAL_APIC, "TRM", " Thermal event interrupt\n"), #endif #ifdef CONFIG_X86_MCE_THRESHOLD - ISS(THRESHOLD_APIC, "THR", " Threshold APIC interrupts\n"), + ISS(THRESHOLD_APIC, "THR", " Threshold APIC interrupts\n"), #endif #ifdef CONFIG_X86_MCE_AMD - ISS(DEFERRED_ERROR, "DFR", " Deferred Error APIC interrupts\n"), + ISS(DEFERRED_ERROR, "DFR", " Deferred Error APIC interrupts\n"), #endif #ifdef CONFIG_X86_MCE - ISS(MCE_EXCEPTION, "MCE", " Machine check exceptions\n"), - ISS(MCE_POLL, "MCP", " Machine check polls\n"), + ISS(MCE_EXCEPTION, "MCE", " Machine check exceptions\n"), + ISS(MCE_POLL, "MCP", " Machine check polls\n"), #endif #ifdef CONFIG_X86_HV_CALLBACK_VECTOR - ITS(HYPERVISOR_CALLBACK, "HYP", " Hypervisor callback interrupts\n"), + ITS(HYPERVISOR_CALLBACK, "HYP", " Hypervisor callback interrupts\n"), #endif #if IS_ENABLED(CONFIG_HYPERV) - ITS(HYPERV_REENLIGHTENMENT, "HRE", " Hyper-V reenlightment interrupts\n"), - ITS(HYPERV_STIMER0, "HVS", " Hyper-V stimer0 interrupts\n"), + ITS(HYPERV_REENLIGHTENMENT, "HRE", " Hyper-V reenlightenment interrupts\n"), + ITS(HYPERV_STIMER0, "HVS", " Hyper-V stimer0 interrupts\n"), #endif #if IS_ENABLED(CONFIG_KVM) - ITS(POSTED_INTR, "PIN", " Posted-interrupt notification event\n"), - ITS(POSTED_INTR_NESTED, "NPI", " Nested posted-interrupt event\n"), - ITS(POSTED_INTR_WAKEUP, "PIW", " Posted-interrupt wakeup event\n"), + ITS(POSTED_INTR, "PIN", " Posted-interrupt notification event\n"), + ITS(POSTED_INTR_NESTED, "NPI", " Nested posted-interrupt event\n"), + ITS(POSTED_INTR_WAKEUP, "PIW", " Posted-interrupt wakeup event\n"), #endif #ifdef CONFIG_GUEST_PERF_EVENTS ISS(PERF_GUEST_MEDIATED_PMI, "VPMI", " Perf Guest Mediated PMI\n"), #endif #ifdef CONFIG_X86_POSTED_MSI - ISS(POSTED_MSI_NOTIFICATION, "PMN", " Posted MSI notification event\n"), + ISS(POSTED_MSI_NOTIFICATION, "PMN", " Posted MSI notification event\n"), +#endif + IDS(PIC_APIC_ERROR, "ERR", " PIC/APIC error interrupts\n"), +#ifdef CONFIG_X86_IO_APIC + IDS(IOAPIC_MISROUTED, "MIS", " Misrouted IO/APIC interrupts\n"), #endif }; @@ -126,19 +133,34 @@ void __init irq_init_stats(void) struct irq_stat_info *info = irq_stat_info; for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) { - if (info->skip_vector && test_bit(info->skip_vector, system_vectors)) + if (info->skip_vector && info->skip_vector != DEFAULT_SUPPRESSED_VECTOR && + test_bit(info->skip_vector, system_vectors)) info->skip_vector = 0; } +#ifdef CONFIG_X86_LOCAL_APIC if (!x86_platform_ipi_callback) irq_stat_info[IRQ_COUNT_X86_PLATFORM_IPI].skip_vector = 1; +#endif #ifdef CONFIG_X86_POSTED_MSI if (!posted_msi_enabled()) - irq_stat_info[IRQ_COUNT_X86_POSTED_MSI].skip_vector = 1; + irq_stat_info[IRQ_COUNT_POSTED_MSI_NOTIFICATION].skip_vector = 1; #endif } +/* + * Used for default enabled counters to increment the stats and to enable the + * entry for /proc/interrupts output. + */ +void irq_stat_inc_and_enable(enum irq_stat_counts which) +{ + this_cpu_inc(irq_stat.counts[which]); + /* Pairs with the READ_ONCE() in arch_show_interrupts() */ + WRITE_ONCE(irq_stat_info[which].skip_vector, 0); +} + +#ifdef CONFIG_PROC_FS /* * /proc/interrupts printing for arch specific interrupts */ @@ -147,17 +169,13 @@ int arch_show_interrupts(struct seq_file *p, int prec) const struct irq_stat_info *info = irq_stat_info; for (unsigned int i = 0; i < ARRAY_SIZE(irq_stat_info); i++, info++) { - if (info->skip_vector) + if (READ_ONCE(info->skip_vector)) continue; seq_printf(p, "%*s:", prec, info->symbol); irq_proc_emit_counts(p, &irq_stat.counts[i]); seq_puts(p, info->text); } - - seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); - if (IS_ENABLED(CONFIG_X86_IO_APIC)) - seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); return 0; } @@ -173,12 +191,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) sum += p->counts[i]; return sum; } - -u64 arch_irq_stat(void) -{ - u64 sum = atomic_read(&irq_err_count); - return sum; -} +#endif /* CONFIG_PROC_FS */ static __always_inline void handle_irq(struct irq_desc *desc, struct pt_regs *regs) diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 8b444e862319..20c3df9a9b80 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -18,9 +18,6 @@ #ifndef arch_irq_stat_cpu #define arch_irq_stat_cpu(cpu) 0 #endif -#ifndef arch_irq_stat -#define arch_irq_stat() 0 -#endif u64 get_idle_time(struct kernel_cpustat *kcs, int cpu) { @@ -122,7 +119,6 @@ static int show_stat(struct seq_file *p, void *v) sum_softirq += softirq_stat; } } - sum += arch_irq_stat(); seq_put_decimal_ull(p, "cpu ", nsec_to_clock_t(user)); seq_put_decimal_ull(p, " ", nsec_to_clock_t(nice)); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 6389a462c731..2809f0fc4175 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -46,9 +46,11 @@ int irq_set_chip(unsigned int irq, const struct irq_chip *chip) scoped_irqdesc->irq_data.chip = (struct irq_chip *)(chip ?: &no_irq_chip); ret = 0; } - /* For !CONFIG_SPARSE_IRQ make the irq show up in allocated_irqs. */ - if (!ret) + if (!ret) { + /* For !CONFIG_SPARSE_IRQ make the irq show up in allocated_irqs. */ irq_mark_irq(irq); + irq_proc_update_chip(chip); + } return ret; } EXPORT_SYMBOL(irq_set_chip); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 37eec0337867..7fbf003c6e93 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -12,6 +12,8 @@ #include #include +#include "proc.h" + #ifdef CONFIG_SPARSE_IRQ # define MAX_SPARSE_IRQS INT_MAX #else @@ -149,12 +151,6 @@ static inline void unregister_handler_proc(unsigned int irq, static inline void irq_proc_update_valid(struct irq_desc *desc) { } #endif -#if defined(CONFIG_PROC_FS) && defined(CONFIG_GENERIC_IRQ_SHOW) -void irq_proc_calc_prec(void); -#else -static inline void irq_proc_calc_prec(void) { } -#endif - struct irq_desc *irq_find_desc_at_or_after(unsigned int offset); extern bool irq_can_set_affinity_usr(unsigned int irq); diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 9b9a75dfeebd..80ef4e27dcf4 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -185,6 +185,7 @@ struct irq_desc *irq_find_desc_at_or_after(unsigned int offset) { unsigned long index = offset; + lockdep_assert_in_rcu_read_lock(); return mt_find(&sparse_irqs, &index, total_nr_irqs); } @@ -930,8 +931,10 @@ EXPORT_SYMBOL_GPL(__irq_alloc_descs); */ unsigned int irq_get_next_irq(unsigned int offset) { - struct irq_desc *desc = irq_find_desc_at_or_after(offset); + struct irq_desc *desc; + guard(rcu)(); + desc = irq_find_desc_at_or_after(offset); return desc ? irq_desc_get_irq(desc) : total_nr_irqs; } diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index cc93abf009e8..9f524ed709b8 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -20,6 +20,8 @@ #include #include +#include "proc.h" + static LIST_HEAD(irq_domain_list); static DEFINE_MUTEX(irq_domain_mutex); @@ -1532,6 +1534,7 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, irq_data->chip = (struct irq_chip *)(chip ? chip : &no_irq_chip); irq_data->chip_data = chip_data; + irq_proc_update_chip(chip); return 0; } EXPORT_SYMBOL_GPL(irq_domain_set_hwirq_and_chip); diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c index f6cdb262d1e7..a62d4694f063 100644 --- a/kernel/irq/proc.c +++ b/kernel/irq/proc.c @@ -443,8 +443,7 @@ void irq_proc_update_valid(struct irq_desc *desc) { u32 set = _IRQ_PROC_VALID; - if (irq_settings_is_hidden(desc) || !desc->action || - irq_desc_is_chained(desc) || !desc->kstat_irqs) + if (irq_settings_is_hidden(desc) || irq_desc_is_chained(desc) || !desc->action) set = 0; irq_settings_update_proc_valid(desc, set); @@ -459,7 +458,16 @@ int __weak arch_show_interrupts(struct seq_file *p, int prec) return 0; } -static int irq_num_prec __read_mostly = 3; +static DEFINE_RAW_SPINLOCK(irq_proc_constraints_lock); + +static struct irq_proc_constraints { + bool print_header; + unsigned int num_prec; + unsigned int chip_width; +} irq_proc_constraints __read_mostly = { + .num_prec = 3, + .chip_width = 8, +}; #ifndef ACTUAL_NR_IRQS # define ACTUAL_NR_IRQS total_nr_irqs @@ -471,7 +479,23 @@ void irq_proc_calc_prec(void) for (prec = 3, n = 1000; prec < 10 && n <= total_nr_irqs; ++prec) n *= 10; - WRITE_ONCE(irq_num_prec, prec); + + guard(raw_spinlock_irqsave)(&irq_proc_constraints_lock); + if (prec > irq_proc_constraints.num_prec) + WRITE_ONCE(irq_proc_constraints.num_prec, prec); +} + +void irq_proc_update_chip(const struct irq_chip *chip) +{ + unsigned int len = chip && chip->name ? strlen(chip->name) : 0; + + if (!len || len <= READ_ONCE(irq_proc_constraints.chip_width)) + return; + + /* Can be invoked from interrupt disabled contexts */ + guard(raw_spinlock_irqsave)(&irq_proc_constraints_lock); + if (len > irq_proc_constraints.chip_width) + WRITE_ONCE(irq_proc_constraints.chip_width, len); } #define ZSTR1 " 0" @@ -512,26 +536,25 @@ void irq_proc_emit_counts(struct seq_file *p, unsigned int __percpu *cnts) static int irq_seq_show(struct seq_file *p, void *v) { - int prec = (int)(unsigned long)p->private; + struct irq_proc_constraints *constr = p->private; struct irq_desc *desc = v; struct irqaction *action; if (desc == ARCH_PROC_IRQDESC) - return arch_show_interrupts(p, prec); + return arch_show_interrupts(p, constr->num_prec); /* print header for the first interrupt indicated by !p>private */ - if (!prec) { + if (constr->print_header) { unsigned int cpu; - prec = READ_ONCE(irq_num_prec); - seq_printf(p, "%*s", prec + 8, ""); + seq_printf(p, "%*s", constr->num_prec + 8, ""); for_each_online_cpu(cpu) seq_printf(p, "CPU%-8d", cpu); seq_putc(p, '\n'); - p->private = (void *)(unsigned long)prec; + constr->print_header = false; } - seq_put_decimal_ull_width(p, "", irq_desc_get_irq(desc), prec); + seq_put_decimal_ull_width(p, "", irq_desc_get_irq(desc), constr->num_prec); seq_putc(p, ':'); /* @@ -543,25 +566,27 @@ static int irq_seq_show(struct seq_file *p, void *v) irq_proc_emit_counts(p, &desc->kstat_irqs->cnt); else irq_proc_emit_zero_counts(p, num_online_cpus()); - seq_putc(p, ' '); + + /* Enforce a visual gap */ + seq_write(p, " ", 2); guard(raw_spinlock_irq)(&desc->lock); if (desc->irq_data.chip) { if (desc->irq_data.chip->irq_print_chip) desc->irq_data.chip->irq_print_chip(&desc->irq_data, p); else if (desc->irq_data.chip->name) - seq_printf(p, "%8s", desc->irq_data.chip->name); + seq_printf(p, "%-*s", constr->chip_width, desc->irq_data.chip->name); else - seq_printf(p, "%8s", "-"); + seq_printf(p, "%-*s", constr->chip_width, "-"); } else { - seq_printf(p, "%8s", "None"); + seq_printf(p, "%-*s", constr->chip_width, "None"); } seq_putc(p, ' '); if (desc->irq_data.domain) - seq_put_decimal_ull_width(p, "", desc->irq_data.hwirq, prec); + seq_put_decimal_ull_width(p, "", desc->irq_data.hwirq, constr->num_prec); else - seq_printf(p, " %*s", prec, ""); + seq_printf(p, " %*s", constr->num_prec, ""); if (IS_ENABLED(CONFIG_GENERIC_IRQ_SHOW_LEVEL)) seq_printf(p, " %-8s", irqd_is_level_type(&desc->irq_data) ? "Level" : "Edge"); @@ -582,14 +607,13 @@ static int irq_seq_show(struct seq_file *p, void *v) static void *irq_seq_next_desc(loff_t *pos) { - struct irq_desc *desc; - if (*pos > total_nr_irqs) return NULL; guard(rcu)(); for (;;) { - desc = irq_find_desc_at_or_after((unsigned int) *pos); + struct irq_desc *desc = irq_find_desc_at_or_after((unsigned int) *pos); + if (desc) { *pos = irq_desc_get_irq(desc); /* @@ -609,8 +633,13 @@ static void *irq_seq_next_desc(loff_t *pos) static void *irq_seq_start(struct seq_file *f, loff_t *pos) { - if (!*pos) - f->private = NULL; + if (!*pos) { + struct irq_proc_constraints *constr = f->private; + + constr->num_prec = READ_ONCE(irq_proc_constraints.num_prec); + constr->chip_width = READ_ONCE(irq_proc_constraints.chip_width); + constr->print_header = true; + } return irq_seq_next_desc(pos); } @@ -638,7 +667,8 @@ static const struct seq_operations irq_seq_ops = { static int __init irq_proc_init(void) { - proc_create_seq("interrupts", 0, NULL, &irq_seq_ops); + proc_create_seq_private("interrupts", 0, NULL, &irq_seq_ops, + sizeof(irq_proc_constraints), NULL); return 0; } fs_initcall(irq_proc_init); diff --git a/kernel/irq/proc.h b/kernel/irq/proc.h new file mode 100644 index 000000000000..ec9173d573f9 --- /dev/null +++ b/kernel/irq/proc.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#if defined(CONFIG_PROC_FS) && defined(CONFIG_GENERIC_IRQ_SHOW) +void irq_proc_calc_prec(void); +void irq_proc_update_chip(const struct irq_chip *chip); +#else +static inline void irq_proc_calc_prec(void) { } +static inline void irq_proc_update_chip(const struct irq_chip *chip) { } +#endif diff --git a/scripts/gdb/linux/interrupts.py b/scripts/gdb/linux/interrupts.py index f4f715a8f0e3..6ca7e32f35b0 100644 --- a/scripts/gdb/linux/interrupts.py +++ b/scripts/gdb/linux/interrupts.py @@ -20,7 +20,7 @@ def irq_desc_is_chained(desc): def irqd_is_level(desc): return desc['irq_data']['common']['state_use_accessors'] & constants.LX_IRQD_LEVEL -def show_irq_desc(prec, irq): +def show_irq_desc(prec, chip_width, irq): text = "" desc = mapletree.mtree_load(gdb.parse_and_eval("&sparse_irqs"), irq) @@ -48,7 +48,7 @@ def show_irq_desc(prec, irq): count = cpus.per_cpu(desc['kstat_irqs'], cpu)['cnt'] else: count = 0 - text += "%10u" % (count) + text += "%10u " % (count) name = "None" if desc['irq_data']['chip']: @@ -58,7 +58,7 @@ def show_irq_desc(prec, irq): else: name = "-" - text += " %8s" % (name) + text += " %-*s" % (chip_width, name) if desc['irq_data']['domain']: text += " %*lu" % (prec, desc['irq_data']['hwirq']) @@ -97,52 +97,26 @@ def show_irq_err_count(prec): text += "%*s: %10u\n" % (prec, "ERR", cnt['counter']) return text -def x86_show_irqstat(prec, pfx, field, desc): - irq_stat = gdb.parse_and_eval("&irq_stat") +def x86_show_irqstat(prec, pfx, idx, desc): + irq_stat = gdb.parse_and_eval("&irq_stat.counts[%d]" %idx) text = "%*s: " % (prec, pfx) for cpu in cpus.each_online_cpu(): stat = cpus.per_cpu(irq_stat, cpu) - text += "%10u " % (stat[field]) - text += " %s\n" % (desc) - return text - -def x86_show_mce(prec, var, pfx, desc): - pvar = gdb.parse_and_eval(var) - text = "%*s: " % (prec, pfx) - for cpu in cpus.each_online_cpu(): - text += "%10u " % (cpus.per_cpu(pvar, cpu).dereference()) - text += " %s\n" % (desc) + text += "%10u " % (stat.dereference()) + text += desc return text def x86_show_interupts(prec): - text = x86_show_irqstat(prec, "NMI", '__nmi_count', 'Non-maskable interrupts') - - if constants.LX_CONFIG_X86_LOCAL_APIC: - text += x86_show_irqstat(prec, "LOC", 'apic_timer_irqs', "Local timer interrupts") - text += x86_show_irqstat(prec, "SPU", 'irq_spurious_count', "Spurious interrupts") - text += x86_show_irqstat(prec, "PMI", 'apic_perf_irqs', "Performance monitoring interrupts") - text += x86_show_irqstat(prec, "IWI", 'apic_irq_work_irqs', "IRQ work interrupts") - text += x86_show_irqstat(prec, "RTR", 'icr_read_retry_count', "APIC ICR read retries") - if utils.gdb_eval_or_none("x86_platform_ipi_callback") is not None: - text += x86_show_irqstat(prec, "PLT", 'x86_platform_ipis', "Platform interrupts") - - if constants.LX_CONFIG_SMP: - text += x86_show_irqstat(prec, "RES", 'irq_resched_count', "Rescheduling interrupts") - text += x86_show_irqstat(prec, "CAL", 'irq_call_count', "Function call interrupts") - text += x86_show_irqstat(prec, "TLB", 'irq_tlb_count', "TLB shootdowns") - - if constants.LX_CONFIG_X86_THERMAL_VECTOR: - text += x86_show_irqstat(prec, "TRM", 'irq_thermal_count', "Thermal events interrupts") + info_type = gdb.lookup_type('struct irq_stat_info') + info = gdb.parse_and_eval('irq_stat_info') - if constants.LX_CONFIG_X86_MCE_THRESHOLD: - text += x86_show_irqstat(prec, "THR", 'irq_threshold_count', "Threshold APIC interrupts") - - if constants.LX_CONFIG_X86_MCE_AMD: - text += x86_show_irqstat(prec, "DFR", 'irq_deferred_error_count', "Deferred Error APIC interrupts") - - if constants.LX_CONFIG_X86_MCE: - text += x86_show_mce(prec, "&mce_exception_count", "MCE", "Machine check exceptions") - text += x86_show_mce(prec, "&mce_poll_count", "MCP", "Machine check polls") + text = "" + for idx in range(int(info.type.sizeof / info_type.sizeof)): + if info[idx]['skip_vector']: + continue + pfx = info[idx]['symbol'].string() + desc = info[idx]['text'].string() + text += x86_show_irqstat(prec, pfx, idx, desc) text += show_irq_err_count(prec) @@ -151,11 +125,6 @@ def x86_show_interupts(prec): if cnt is not None: text += "%*s: %10u\n" % (prec, "MIS", cnt['counter']) - if constants.LX_CONFIG_KVM: - text += x86_show_irqstat(prec, "PIN", 'kvm_posted_intr_ipis', 'Posted-interrupt notification event') - text += x86_show_irqstat(prec, "NPI", 'kvm_posted_intr_nested_ipis', 'Nested posted-interrupt event') - text += x86_show_irqstat(prec, "PIW", 'kvm_posted_intr_wakeup_ipis', 'Posted-interrupt wakeup event') - return text def arm_common_show_interrupts(prec): @@ -209,12 +178,19 @@ class LxInterruptList(gdb.Command): super(LxInterruptList, self).__init__("lx-interruptlist", gdb.COMMAND_DATA) def invoke(self, arg, from_tty): - nr_irqs = gdb.parse_and_eval("nr_irqs") - prec = 3 - j = 1000 - while prec < 10 and j <= nr_irqs: - prec += 1 - j *= 10 + nr_irqs = gdb.parse_and_eval("total_nr_irqs") + constr = utils.gdb_eval_or_none('irq_proc_constraints') + + if constr: + prec = int(constr['num_prec']) + chip_width = int(constr['chip_width']) + else: + prec = 3 + j = 1000 + while prec < 10 and j <= nr_irqs: + prec += 1 + j *= 10 + chip_width = 8 gdb.write("%*s" % (prec + 8, "")) for cpu in cpus.each_online_cpu(): @@ -225,7 +201,7 @@ class LxInterruptList(gdb.Command): raise gdb.GdbError("Unable to find the sparse IRQ tree, is CONFIG_SPARSE_IRQ enabled?") for irq in range(nr_irqs): - gdb.write(show_irq_desc(prec, irq)) + gdb.write(show_irq_desc(prec, chip_width, irq)) gdb.write(arch_show_interrupts(prec))