From: Andrew Cooper <andrew.cooper3@citrix.com>
To: xen-devel@lists.xen.org
Subject: Re: [PATCH] x86/nmi: Make external NMI injection reliably crash the host
Date: Tue, 26 Aug 2014 11:17:49 +0100 [thread overview]
Message-ID: <53FC5ECD.9070205@citrix.com> (raw)
In-Reply-To: <1409047805-17893-1-git-send-email-ross.lagerwall@citrix.com>
On 26/08/14 11:10, Ross Lagerwall wrote:
> Change the watchdog handler to only "tick" if the corresponding perf
> counter has overflowed; otherwise, return false from the NMI handler to
> indicate that the NMI is not a watchdog tick and let the other handlers
> handle it. This allows externally injected NMIs to reliably crash the
> host rather than be swallowed by the watchdog handler.
More specifically, external NMIs which don't set one of the bits in the
System Control Port B
Most vendors which support an "Inject NMI" option from their ipmi/web
interface do not set any bits in control port B, at which point the
attempt to crash the server will be eaten by the watchdog logic.
>
> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
> CC: Keir Fraser <keir@xen.org>
> CC: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
> ---
> xen/arch/x86/nmi.c | 76 ++++++++++++++++++++++++++++++++--------------
> xen/arch/x86/traps.c | 6 ++--
> xen/include/asm-x86/apic.h | 2 +-
> 3 files changed, 57 insertions(+), 27 deletions(-)
>
> diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
> index c4427a6..5bf0e2c 100644
> --- a/xen/arch/x86/nmi.c
> +++ b/xen/arch/x86/nmi.c
> @@ -15,6 +15,7 @@
>
> #include <xen/config.h>
> #include <xen/init.h>
> +#include <xen/stdbool.h>
> #include <xen/lib.h>
> #include <xen/mm.h>
> #include <xen/irq.h>
> @@ -82,6 +83,7 @@ int nmi_active;
> #define K7_EVNTSEL_USR (1 << 16)
> #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
> #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
> +#define K7_EVENT_WIDTH 32
>
> #define P6_EVNTSEL0_ENABLE (1 << 22)
> #define P6_EVNTSEL_INT (1 << 20)
> @@ -89,10 +91,12 @@ int nmi_active;
> #define P6_EVNTSEL_USR (1 << 16)
> #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
> #define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c
> +#define P6_EVENT_WIDTH 32
>
> #define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
> #define P4_CCCR_OVF_PMI0 (1<<26)
> #define P4_CCCR_OVF_PMI1 (1<<27)
> +#define P4_CCCR_OVF (1<<31)
> #define P4_CCCR_THRESHOLD(N) ((N)<<20)
> #define P4_CCCR_COMPLEMENT (1<<19)
> #define P4_CCCR_COMPARE (1<<18)
> @@ -432,35 +436,23 @@ int __init watchdog_setup(void)
> return 0;
> }
>
> -void nmi_watchdog_tick(const struct cpu_user_regs *regs)
> +/* Returns true if this was a watchdog NMI, false otherwise */
> +bool_t nmi_watchdog_tick(const struct cpu_user_regs *regs)
> {
> + bool_t watchdog_tick = 0;
> unsigned int sum = this_cpu(nmi_timer_ticks);
>
> - if ( (this_cpu(last_irq_sums) == sum) && watchdog_enabled() )
> - {
> - /*
> - * Ayiee, looks like this CPU is stuck ... wait for the timeout
> - * before doing the oops ...
> - */
> - this_cpu(alert_counter)++;
> - if ( this_cpu(alert_counter) == opt_watchdog_timeout*nmi_hz )
> - {
> - console_force_unlock();
> - printk("Watchdog timer detects that CPU%d is stuck!\n",
> - smp_processor_id());
> - fatal_trap(TRAP_nmi, regs);
> - }
> - }
> - else
> - {
> - this_cpu(last_irq_sums) = sum;
> - this_cpu(alert_counter) = 0;
> - }
> -
> if ( nmi_perfctr_msr )
> {
> + uint64_t msr_content;
> +
> + /* Work out if this is a watchdog tick by checking for overflow. */
> if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 )
> {
> + rdmsrl(MSR_P4_IQ_CCCR0, msr_content);
> + if ( msr_content & P4_CCCR_OVF )
> + watchdog_tick = 1;
> +
> /*
> * P4 quirks:
> * - An overflown perfctr will assert its interrupt
> @@ -473,14 +465,52 @@ void nmi_watchdog_tick(const struct cpu_user_regs *regs)
> }
> else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 )
> {
> + rdmsrl(MSR_P6_PERFCTR0, msr_content);
> + watchdog_tick = !(msr_content & (1ULL << P6_EVENT_WIDTH));
> +
> /*
> * Only P6 based Pentium M need to re-unmask the apic vector but
> * it doesn't hurt other P6 variants.
> */
> apic_write(APIC_LVTPC, APIC_DM_NMI);
> }
> - write_watchdog_counter(NULL);
> + else if ( nmi_perfctr_msr == MSR_K7_PERFCTR0 )
> + {
> + rdmsrl(MSR_K7_PERFCTR0, msr_content);
> + watchdog_tick = !(msr_content & (1ULL << K7_EVENT_WIDTH));
> + }
> +
> + if ( watchdog_tick )
> + {
> + unsigned int *this_alert_counter = &this_cpu(alert_counter);
> + unsigned int *this_last_irq_sums = &this_cpu(last_irq_sums);
> +
> + write_watchdog_counter(NULL);
> +
> + if ( (*this_last_irq_sums == sum) && watchdog_enabled() )
> + {
> + /*
> + * Ayiee, looks like this CPU is stuck ... wait for the timeout
> + * before doing the oops ...
> + */
> + ++*this_alert_counter;
> + if ( *this_alert_counter == opt_watchdog_timeout * nmi_hz )
> + {
> + console_force_unlock();
> + printk("Watchdog timer detects that CPU%d is stuck!\n",
> + smp_processor_id());
> + fatal_trap(TRAP_nmi, regs);
> + }
> + }
> + else
> + {
> + *this_last_irq_sums = sum;
> + this_alert_counter = 0;
> + }
> + }
> }
> +
> + return watchdog_tick;
> }
>
> /*
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index 71be2ae..38f42fa 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -3312,8 +3312,8 @@ void do_nmi(const struct cpu_user_regs *regs)
> if ( nmi_callback(regs, cpu) )
> return;
>
> - if ( nmi_watchdog )
> - nmi_watchdog_tick(regs);
> + if ( nmi_watchdog && nmi_watchdog_tick(regs) )
> + return;
>
> /* Only the BSP gets external NMIs from the system. */
> if ( cpu == 0 )
> @@ -3323,7 +3323,7 @@ void do_nmi(const struct cpu_user_regs *regs)
> pci_serr_error(regs);
> if ( reason & 0x40 )
> io_check_error(regs);
> - if ( !(reason & 0xc0) && !nmi_watchdog )
> + if ( !(reason & 0xc0) )
> unknown_nmi_error(regs, reason);
> }
> }
> diff --git a/xen/include/asm-x86/apic.h b/xen/include/asm-x86/apic.h
> index 5d7623f..6697245 100644
> --- a/xen/include/asm-x86/apic.h
> +++ b/xen/include/asm-x86/apic.h
> @@ -206,7 +206,7 @@ extern void release_lapic_nmi(void);
> extern void self_nmi(void);
> extern void disable_timer_nmi_watchdog(void);
> extern void enable_timer_nmi_watchdog(void);
> -extern void nmi_watchdog_tick (const struct cpu_user_regs *regs);
> +extern bool_t nmi_watchdog_tick (const struct cpu_user_regs *regs);
> extern int APIC_init_uniprocessor (void);
> extern void disable_APIC_timer(void);
> extern void enable_APIC_timer(void);
next prev parent reply other threads:[~2014-08-26 10:17 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-08-26 10:10 [PATCH] x86/nmi: Make external NMI injection reliably crash the host Ross Lagerwall
2014-08-26 10:17 ` Andrew Cooper [this message]
2014-08-26 12:59 ` Jan Beulich
2014-08-26 15:26 ` Ross Lagerwall
2014-08-26 15:38 ` Jan Beulich
2014-08-27 11:14 ` Ross Lagerwall
2014-08-27 12:04 ` Jan Beulich
2014-08-26 16:06 ` Don Slutz
2014-08-26 16:51 ` Andrew Cooper
2014-08-26 21:51 ` Don Slutz
2014-08-26 23:01 ` Andrew Cooper
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=53FC5ECD.9070205@citrix.com \
--to=andrew.cooper3@citrix.com \
--cc=xen-devel@lists.xen.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.