[PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi()
@ 2023-01-05  1:15 Paul E. McKenney
  2023-01-05  1:15 ` [PATCH x86/nmi 2/2] x86/nmi: Print reasons why backtrace NMIs are ignored Paul E. McKenney
  2023-01-09 16:19 ` [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi() Peter Zijlstra
  0 siblings, 2 replies; 6+ messages in thread
From: Paul E. McKenney @ 2023-01-05  1:15 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel-team, Paul E. McKenney, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, H. Peter Anvin, x86

CPUs ignoring NMIs is often a sign of those CPUs going bad, but there
are quite a few other reasons why a CPU might ignore NMIs.  Therefore,
accumulate evidence within exc_nmi() as to what might be preventing a
given CPU from responding to an NMI.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <x86@kernel.org>
---
 arch/x86/kernel/nmi.c | 31 ++++++++++++++++++++++++++++++-
 lib/Kconfig.debug     | 11 +++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index cec0bfa3bc04f..4f1651dc65b3a 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -69,6 +69,15 @@ struct nmi_stats {
 	unsigned int unknown;
 	unsigned int external;
 	unsigned int swallow;
+	unsigned long recv_jiffies;
+	unsigned long idt_seq;
+	unsigned long idt_nmi_seq;
+	unsigned long idt_ignored;
+	atomic_long_t idt_calls;
+	unsigned long idt_seq_snap;
+	unsigned long idt_nmi_seq_snap;
+	unsigned long idt_ignored_snap;
+	long idt_calls_snap;
 };
 
 static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
@@ -479,12 +488,15 @@ static DEFINE_PER_CPU(unsigned long, nmi_dr7);
 DEFINE_IDTENTRY_RAW(exc_nmi)
 {
 	irqentry_state_t irq_state;
+	struct nmi_stats *nsp = this_cpu_ptr(&nmi_stats);
 
 	/*
 	 * Re-enable NMIs right here when running as an SEV-ES guest. This might
 	 * cause nested NMIs, but those can be handled safely.
 	 */
 	sev_es_nmi_complete();
+	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU))
+		arch_atomic_long_inc(&nsp->idt_calls);
 
 	if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
 		return;
@@ -495,6 +507,11 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
 	}
 	this_cpu_write(nmi_state, NMI_EXECUTING);
 	this_cpu_write(nmi_cr2, read_cr2());
+	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+		WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
+		WARN_ON_ONCE(!(nsp->idt_seq & 0x1));
+		WRITE_ONCE(nsp->recv_jiffies, jiffies);
+	}
 nmi_restart:
 
 	/*
@@ -509,8 +526,15 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
 
 	inc_irq_stat(__nmi_count);
 
-	if (!ignore_nmis)
+	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
+		WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
+	} else if (!ignore_nmis) {
+		WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
+		WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1));
 		default_do_nmi(regs);
+		WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
+		WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1);
+	}
 
 	irqentry_nmi_exit(regs, irq_state);
 
@@ -525,6 +549,11 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
 
 	if (user_mode(regs))
 		mds_user_clear_cpu_buffers();
+	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU)) {
+		WRITE_ONCE(nsp->idt_seq, nsp->idt_seq + 1);
+		WARN_ON_ONCE(nsp->idt_seq & 0x1);
+		WRITE_ONCE(nsp->recv_jiffies, jiffies);
+	}
 }
 
 #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 881c3f84e88a3..dad99197d2695 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1552,6 +1552,17 @@ config TRACE_IRQFLAGS_NMI
 	depends on TRACE_IRQFLAGS
 	depends on TRACE_IRQFLAGS_NMI_SUPPORT
 
+config NMI_CHECK_CPU
+	bool "Debugging for CPUs failing to respond to backtrace requests"
+	depends on DEBUG_KERNEL
+	depends on X86
+	default n
+	help
+	  Enables debug prints when a CPU fails to respond to a given
+	  backtrace NMI.  These prints provide some reasons why a CPU
+	  might legitimately be failing to respond, for example, if it
+	  is offline of if ignore_nmis is set.
+
 config DEBUG_IRQFLAGS
 	bool "Debug IRQ flag manipulation"
 	help
-- 
2.31.1.189.g2e36527f23


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH x86/nmi 2/2] x86/nmi: Print reasons why backtrace NMIs are ignored
  2023-01-05  1:15 [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi() Paul E. McKenney
@ 2023-01-05  1:15 ` Paul E. McKenney
  2023-01-05 10:40   ` Ingo Molnar
  2023-01-09 16:19 ` [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi() Peter Zijlstra
  1 sibling, 1 reply; 6+ messages in thread
From: Paul E. McKenney @ 2023-01-05  1:15 UTC (permalink / raw)
  To: linux-kernel
  Cc: kernel-team, Paul E. McKenney, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, H. Peter Anvin, x86

Instrument nmi_trigger_cpumask_backtrace() to dump out diagnostics based
on evidence accumulated by exc_nmi().  These diagnostics are dumped for
CPUs that ignored an NMI backtrace request for more than 10 seconds.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: <x86@kernel.org>
---
 arch/x86/kernel/nmi.c | 73 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/nmi.h   |  8 +++++
 lib/nmi_backtrace.c   |  2 ++
 3 files changed, 83 insertions(+)

diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 4f1651dc65b3a..4a58ea6a806e1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -566,6 +566,79 @@ DEFINE_IDTENTRY_RAW(exc_nmi_noist)
 EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
 #endif
 
+#ifdef CONFIG_NMI_CHECK_CPU
+
+static char *nmi_check_stall_msg[] = {
+/*									*/
+/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler.		*/
+/* | +------ cpu_is_offline(cpu)					*/
+/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls):	*/
+/* | | |	NMI handler has been invoked.				*/
+/* | | |								*/
+/* V V V								*/
+/* 0 0 0 */ "NMIs are not reaching exc_nmi handler",
+/* 0 0 1 */ "exc_nmi handler is ignoring NMIs",
+/* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi handler",
+/* 0 1 1 */ "CPU is offline and exc_nmi handler is legitimately ignoring NMIs",
+/* 1 0 0 */ "CPU is in exc_nmi handler and no further NMIs are reaching handler",
+/* 1 0 1 */ "CPU is in exc_nmi handler which is legitimately ignoring NMIs",
+/* 1 1 0 */ "CPU is offline in exc_nmi handler and no further NMIs are reaching exc_nmi handler",
+/* 1 1 1 */ "CPU is offline in exc_nmi handler which is legitimately ignoring NMIs",
+};
+
+void nmi_backtrace_stall_snap(const struct cpumask *btp)
+{
+	int cpu;
+	struct nmi_stats *nsp;
+
+	for_each_cpu(cpu, btp) {
+		nsp = per_cpu_ptr(&nmi_stats, cpu);
+		nsp->idt_seq_snap = READ_ONCE(nsp->idt_seq);
+		nsp->idt_nmi_seq_snap = READ_ONCE(nsp->idt_nmi_seq);
+		nsp->idt_ignored_snap = READ_ONCE(nsp->idt_ignored);
+		nsp->idt_calls_snap = atomic_long_read(&nsp->idt_calls);
+	}
+}
+
+void nmi_backtrace_stall_check(const struct cpumask *btp)
+{
+	int cpu;
+	int idx;
+	unsigned long nmi_seq;
+	unsigned long j = jiffies;
+	char *modp;
+	char *msgp;
+	char *msghp;
+	struct nmi_stats *nsp;
+
+	for_each_cpu(cpu, btp) {
+		nsp = per_cpu_ptr(&nmi_stats, cpu);
+		modp = "";
+		msghp = "";
+		nmi_seq = READ_ONCE(nsp->idt_nmi_seq);
+		if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) {
+			msgp = "CPU entered NMI handler function, but has not exited";
+		} else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) {
+			msgp = "CPU is handling NMIs";
+		} else {
+			idx = ((nsp->idt_seq_snap & 0x1) << 2) |
+			      (cpu_is_offline(cpu) << 1) |
+			      (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls));
+			msgp = nmi_check_stall_msg[idx];
+			if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1))
+				modp = ", but OK because ignore_nmis was set";
+			if (nmi_seq & ~0x1)
+				msghp = " (CPU currently in NMI handler function)";
+			else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq)
+				msghp = " (CPU exited one NMI handler function)";
+		}
+		pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n",
+			 __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies));
+	}
+}
+
+#endif
+
 void stop_nmi(void)
 {
 	ignore_nmis++;
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index f700ff2df074e..048c0b9aa623d 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -214,4 +214,12 @@ int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *);
 #include <asm/nmi.h>
 #endif
 
+#ifdef CONFIG_NMI_CHECK_CPU
+void nmi_backtrace_stall_snap(const struct cpumask *btp);
+void nmi_backtrace_stall_check(const struct cpumask *btp);
+#else
+static inline void nmi_backtrace_stall_snap(const struct cpumask *btp) {}
+static inline void nmi_backtrace_stall_check(const struct cpumask *btp) {}
+#endif
+
 #endif
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index d01aec6ae15c8..5274bbb026d79 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -64,6 +64,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
 	if (!cpumask_empty(to_cpumask(backtrace_mask))) {
 		pr_info("Sending NMI from CPU %d to CPUs %*pbl:\n",
 			this_cpu, nr_cpumask_bits, to_cpumask(backtrace_mask));
+		nmi_backtrace_stall_snap(to_cpumask(backtrace_mask));
 		raise(to_cpumask(backtrace_mask));
 	}
 
@@ -74,6 +75,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
 		mdelay(1);
 		touch_softlockup_watchdog();
 	}
+	nmi_backtrace_stall_check(to_cpumask(backtrace_mask));
 
 	/*
 	 * Force flush any remote buffers that might be stuck in IRQ context
-- 
2.31.1.189.g2e36527f23


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH x86/nmi 2/2] x86/nmi: Print reasons why backtrace NMIs are ignored
  2023-01-05  1:15 ` [PATCH x86/nmi 2/2] x86/nmi: Print reasons why backtrace NMIs are ignored Paul E. McKenney
@ 2023-01-05 10:40   ` Ingo Molnar
  2023-01-05 19:24     ` Paul E. McKenney
  0 siblings, 1 reply; 6+ messages in thread
From: Ingo Molnar @ 2023-01-05 10:40 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, kernel-team, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, H. Peter Anvin, x86


* Paul E. McKenney <paulmck@kernel.org> wrote:

> +#ifdef CONFIG_NMI_CHECK_CPU
> +
> +static char *nmi_check_stall_msg[] = {
> +/*									*/
> +/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler.		*/
> +/* | +------ cpu_is_offline(cpu)					*/
> +/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls):	*/
> +/* | | |	NMI handler has been invoked.				*/
> +/* | | |								*/
> +/* V V V								*/
> +/* 0 0 0 */ "NMIs are not reaching exc_nmi handler",
> +/* 0 0 1 */ "exc_nmi handler is ignoring NMIs",
> +/* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi handler",
> +/* 0 1 1 */ "CPU is offline and exc_nmi handler is legitimately ignoring NMIs",
> +/* 1 0 0 */ "CPU is in exc_nmi handler and no further NMIs are reaching handler",
> +/* 1 0 1 */ "CPU is in exc_nmi handler which is legitimately ignoring NMIs",
> +/* 1 1 0 */ "CPU is offline in exc_nmi handler and no further NMIs are reaching exc_nmi handler",
> +/* 1 1 1 */ "CPU is offline in exc_nmi handler which is legitimately ignoring NMIs",

That kind of disambiguation of why a CPU is stuck looks really useful:

   Reviewed-by: Ingo Molnar <mingo@kernel.org>

One small suggestion would be to do this in the messages:

   s/exc_nmi handler
    /exc_nmi() handler

... to make it clear that it's a regular kernel function [well, hw entry 
handler], not a function pointer or some other indirection? No strong 
feelings though.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH x86/nmi 2/2] x86/nmi: Print reasons why backtrace NMIs are ignored
  2023-01-05 10:40   ` Ingo Molnar
@ 2023-01-05 19:24     ` Paul E. McKenney
  0 siblings, 0 replies; 6+ messages in thread
From: Paul E. McKenney @ 2023-01-05 19:24 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: linux-kernel, kernel-team, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, H. Peter Anvin, x86

On Thu, Jan 05, 2023 at 11:40:30AM +0100, Ingo Molnar wrote:
> 
> * Paul E. McKenney <paulmck@kernel.org> wrote:
> 
> > +#ifdef CONFIG_NMI_CHECK_CPU
> > +
> > +static char *nmi_check_stall_msg[] = {
> > +/*									*/
> > +/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler.		*/
> > +/* | +------ cpu_is_offline(cpu)					*/
> > +/* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls):	*/
> > +/* | | |	NMI handler has been invoked.				*/
> > +/* | | |								*/
> > +/* V V V								*/
> > +/* 0 0 0 */ "NMIs are not reaching exc_nmi handler",
> > +/* 0 0 1 */ "exc_nmi handler is ignoring NMIs",
> > +/* 0 1 0 */ "CPU is offline and NMIs are not reaching exc_nmi handler",
> > +/* 0 1 1 */ "CPU is offline and exc_nmi handler is legitimately ignoring NMIs",
> > +/* 1 0 0 */ "CPU is in exc_nmi handler and no further NMIs are reaching handler",
> > +/* 1 0 1 */ "CPU is in exc_nmi handler which is legitimately ignoring NMIs",
> > +/* 1 1 0 */ "CPU is offline in exc_nmi handler and no further NMIs are reaching exc_nmi handler",
> > +/* 1 1 1 */ "CPU is offline in exc_nmi handler which is legitimately ignoring NMIs",
> 
> That kind of disambiguation of why a CPU is stuck looks really useful:
> 
>    Reviewed-by: Ingo Molnar <mingo@kernel.org>

Glad you like it and thank you!  I will apply this on the next rebase.

> One small suggestion would be to do this in the messages:
> 
>    s/exc_nmi handler
>     /exc_nmi() handler
> 
> ... to make it clear that it's a regular kernel function [well, hw entry 
> handler], not a function pointer or some other indirection? No strong 
> feelings though.

Will do!  I would balk at "DEFINE_IDTENTRY_RAW(exc_nmi)", though.  ;-)

							Thanx, Paul

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi()
  2023-01-05  1:15 [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi() Paul E. McKenney
  2023-01-05  1:15 ` [PATCH x86/nmi 2/2] x86/nmi: Print reasons why backtrace NMIs are ignored Paul E. McKenney
@ 2023-01-09 16:19 ` Peter Zijlstra
  2023-01-09 16:59   ` Paul E. McKenney
  1 sibling, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2023-01-09 16:19 UTC (permalink / raw)
  To: Paul E. McKenney
  Cc: linux-kernel, kernel-team, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, H. Peter Anvin, x86

On Wed, Jan 04, 2023 at 05:15:36PM -0800, Paul E. McKenney wrote:

> diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
> index cec0bfa3bc04f..4f1651dc65b3a 100644
> --- a/arch/x86/kernel/nmi.c
> +++ b/arch/x86/kernel/nmi.c
> @@ -69,6 +69,15 @@ struct nmi_stats {
>  	unsigned int unknown;
>  	unsigned int external;
>  	unsigned int swallow;
> +	unsigned long recv_jiffies;
> +	unsigned long idt_seq;
> +	unsigned long idt_nmi_seq;
> +	unsigned long idt_ignored;
> +	atomic_long_t idt_calls;
> +	unsigned long idt_seq_snap;
> +	unsigned long idt_nmi_seq_snap;
> +	unsigned long idt_ignored_snap;
> +	long idt_calls_snap;
>  };

Urgh, this is more than a whole cacheline of extra data :/ Can't we make
this #ifdef CONFIG_NMI_CHECK_CPU ?

> @@ -509,8 +526,15 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
>  
>  	inc_irq_stat(__nmi_count);
>  
> -	if (!ignore_nmis)
> +	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
> +		WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
> +	} else if (!ignore_nmis) {
> +		WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
> +		WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1));
>  		default_do_nmi(regs);
> +		WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
> +		WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1);
> +	}
>  
>  	irqentry_nmi_exit(regs, irq_state);

That is not a NO-OP when !CONFIG_NMI_CHECK_CPU :/

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi()
  2023-01-09 16:19 ` [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi() Peter Zijlstra
@ 2023-01-09 16:59   ` Paul E. McKenney
  0 siblings, 0 replies; 6+ messages in thread
From: Paul E. McKenney @ 2023-01-09 16:59 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: linux-kernel, kernel-team, Thomas Gleixner, Ingo Molnar,
	Borislav Petkov, Dave Hansen, H. Peter Anvin, x86

On Mon, Jan 09, 2023 at 05:19:59PM +0100, Peter Zijlstra wrote:
> On Wed, Jan 04, 2023 at 05:15:36PM -0800, Paul E. McKenney wrote:
> 
> > diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
> > index cec0bfa3bc04f..4f1651dc65b3a 100644
> > --- a/arch/x86/kernel/nmi.c
> > +++ b/arch/x86/kernel/nmi.c
> > @@ -69,6 +69,15 @@ struct nmi_stats {
> >  	unsigned int unknown;
> >  	unsigned int external;
> >  	unsigned int swallow;
> > +	unsigned long recv_jiffies;
> > +	unsigned long idt_seq;
> > +	unsigned long idt_nmi_seq;
> > +	unsigned long idt_ignored;
> > +	atomic_long_t idt_calls;
> > +	unsigned long idt_seq_snap;
> > +	unsigned long idt_nmi_seq_snap;
> > +	unsigned long idt_ignored_snap;
> > +	long idt_calls_snap;
> >  };
> 
> Urgh, this is more than a whole cacheline of extra data :/ Can't we make
> this #ifdef CONFIG_NMI_CHECK_CPU ?

We can.  However, the new data is at the end of the structure.  Plus doing
that will also require converting the IS_ENABLED() checks to #ifdef or
to lots of tiny functions, neither of which will be at all pretty.

Another approach would be to have a #else that created a union of all
of these fields, thus reducing this unused space so that of a single
unsigned long, while still permitting IS_ENABLED() checks.  Thoughts?

> > @@ -509,8 +526,15 @@ DEFINE_IDTENTRY_RAW(exc_nmi)
> >  
> >  	inc_irq_stat(__nmi_count);
> >  
> > -	if (!ignore_nmis)
> > +	if (IS_ENABLED(CONFIG_NMI_CHECK_CPU) && ignore_nmis) {
> > +		WRITE_ONCE(nsp->idt_ignored, nsp->idt_ignored + 1);
> > +	} else if (!ignore_nmis) {
> > +		WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
> > +		WARN_ON_ONCE(!(nsp->idt_nmi_seq & 0x1));
> >  		default_do_nmi(regs);
> > +		WRITE_ONCE(nsp->idt_nmi_seq, nsp->idt_nmi_seq + 1);
> > +		WARN_ON_ONCE(nsp->idt_nmi_seq & 0x1);
> > +	}
> >  
> >  	irqentry_nmi_exit(regs, irq_state);
> 
> That is not a NO-OP when !CONFIG_NMI_CHECK_CPU :/

Good catch, will fix!

							Thanx, Paul

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-01-09 17:06 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-01-05  1:15 [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi() Paul E. McKenney
2023-01-05  1:15 ` [PATCH x86/nmi 2/2] x86/nmi: Print reasons why backtrace NMIs are ignored Paul E. McKenney
2023-01-05 10:40   ` Ingo Molnar
2023-01-05 19:24     ` Paul E. McKenney
2023-01-09 16:19 ` [PATCH x86/nmi 1/2] x86/nmi: Accumulate NMI-progress evidence in exc_nmi() Peter Zijlstra
2023-01-09 16:59   ` Paul E. McKenney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox