All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andrew Cooper <andrew.cooper3@citrix.com>
To: Ross Lagerwall <ross.lagerwall@citrix.com>,
	Xen-devel <xen-devel@lists.xen.org>
Cc: Keir Fraser <keir@xen.org>, Jan Beulich <jbeulich@suse.com>
Subject: Re: [PATCH v3] x86/NMI: Allow processing unknown NMIs when watchdog is enabled
Date: Thu, 28 Aug 2014 15:07:24 +0100	[thread overview]
Message-ID: <53FF379C.8070701@citrix.com> (raw)
In-Reply-To: <1409156728-4995-1-git-send-email-ross.lagerwall@citrix.com>

On 27/08/14 17:25, Ross Lagerwall wrote:
> Change NMI processing so that if watchdog=force is passed on the
> command-line and the NMI is not caused by a perf counter overflow (i.e.
> likely not a watchdog "tick"), the NMI is handled by the unknown NMI
> handler.
>
> This allows injection of NMIs from IPMI controllers that don't set the
> IOCK/SERR bits to trigger the unknown NMI handler rather than be
> ignored.
>
> Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com>
> CC: Keir Fraser <keir@xen.org>
> CC: Jan Beulich <jbeulich@suse.com>
> ---
>
> Changes since V2:
> Fix processing when watchdog is turned off.
>
> Changes since V1:
> Most of the original behavior is now restored. Now, only if
> watchdog=force is given on the command-line and the NMI is not a
> watchdog tick (i.e. the perf counter has not overflowed) will the
> unknown NMI handler be invoked.
>
>  docs/misc/xen-command-line.markdown |  6 ++++--
>  xen/arch/x86/nmi.c                  | 39 +++++++++++++++++++++++++++++++++++--
>  xen/arch/x86/traps.c                |  7 ++++---
>  xen/include/asm-x86/apic.h          |  2 +-
>  xen/include/asm-x86/nmi.h           |  3 +++
>  5 files changed, 49 insertions(+), 8 deletions(-)
>
> diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
> index a8cab59..1cbcca4 100644
> --- a/docs/misc/xen-command-line.markdown
> +++ b/docs/misc/xen-command-line.markdown
> @@ -1245,12 +1245,14 @@ As the BTS virtualisation is not 100% safe and because of the nehalem quirk
>  don't use the vpmu flag on production systems with Intel cpus!
>  
>  ### watchdog
> -> `= <boolean>`
> +> `= force | <boolean>`
>  
>  > Default: `false`
>  
>  Run an NMI watchdog on each processor.  If a processor is stuck for
> -longer than the **watchdog\_timeout**, a panic occurs.
> +longer than the **watchdog\_timeout**, a panic occurs.  When `force` is
> +specified, in addition to running an NMI watchdog on each processor,
> +unknown NMIs will still be processed.
>  
>  ### watchdog\_timeout
>  > `= <integer>`
> diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c
> index c4427a6..873427f 100644
> --- a/xen/arch/x86/nmi.c
> +++ b/xen/arch/x86/nmi.c
> @@ -43,7 +43,18 @@ static DEFINE_PER_CPU(unsigned int, nmi_timer_ticks);
>  
>  /* opt_watchdog: If true, run a watchdog NMI on each processor. */
>  bool_t __initdata opt_watchdog = 0;
> -boolean_param("watchdog", opt_watchdog);
> +
> +/* watchdog_force: If true, process unknown NMIs when running the watchdog. */
> +bool_t watchdog_force = 0;
> +
> +static void __init parse_watchdog(char * s)
> +{
> +    opt_watchdog = !!parse_bool(s);

A lot of code in Xen gets the use of parse_bool() wrong.  In this case,
opt_watchdog will be set if garbage is passed in the parameter, which is
a change in behaviour from before.

I recommend:

switch ( parse_bool(s) )
{
case 0:
    break;
default:
    if ( !strcmp(s, "force") )
        watchdog_force = 1;
    /* fall through */
case 1:
    opt_watchdog = 1;
}

~Andrew

> +
> +    if ( !strcmp(s, "force") )
> +        watchdog_force = 1;
> +}
> +custom_param("watchdog", parse_watchdog);
>  
>  /* opt_watchdog_timeout: Number of seconds to wait before panic. */
>  static unsigned int opt_watchdog_timeout = 5;
> @@ -82,6 +93,7 @@ int nmi_active;
>  #define K7_EVNTSEL_USR		(1 << 16)
>  #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING	0x76
>  #define K7_NMI_EVENT		K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
> +#define K7_EVENT_WIDTH          32
>  
>  #define P6_EVNTSEL0_ENABLE	(1 << 22)
>  #define P6_EVNTSEL_INT		(1 << 20)
> @@ -89,10 +101,12 @@ int nmi_active;
>  #define P6_EVNTSEL_USR		(1 << 16)
>  #define P6_EVENT_CPU_CLOCKS_NOT_HALTED	 0x79
>  #define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c
> +#define P6_EVENT_WIDTH          32
>  
>  #define P4_ESCR_EVENT_SELECT(N)	((N)<<25)
>  #define P4_CCCR_OVF_PMI0	(1<<26)
>  #define P4_CCCR_OVF_PMI1	(1<<27)
> +#define P4_CCCR_OVF		(1<<31)
>  #define P4_CCCR_THRESHOLD(N)	((N)<<20)
>  #define P4_CCCR_COMPLEMENT	(1<<19)
>  #define P4_CCCR_COMPARE		(1<<18)
> @@ -432,8 +446,10 @@ int __init watchdog_setup(void)
>      return 0;
>  }
>  
> -void nmi_watchdog_tick(const struct cpu_user_regs *regs)
> +/* Returns false if this was not a watchdog NMI, true otherwise */
> +bool_t nmi_watchdog_tick(const struct cpu_user_regs *regs)
>  {
> +    bool_t watchdog_tick = 1;
>      unsigned int sum = this_cpu(nmi_timer_ticks);
>  
>      if ( (this_cpu(last_irq_sums) == sum) && watchdog_enabled() )
> @@ -459,8 +475,15 @@ void nmi_watchdog_tick(const struct cpu_user_regs *regs)
>  
>      if ( nmi_perfctr_msr )
>      {
> +        uint64_t msr_content;
> +
> +        /* Work out if this is a watchdog tick by checking for overflow. */
>          if ( nmi_perfctr_msr == MSR_P4_IQ_PERFCTR0 )
>          {
> +            rdmsrl(MSR_P4_IQ_CCCR0, msr_content);
> +            if ( !(msr_content & P4_CCCR_OVF) )
> +                watchdog_tick = 0;
> +
>              /*
>               * P4 quirks:
>               * - An overflown perfctr will assert its interrupt
> @@ -473,14 +496,26 @@ void nmi_watchdog_tick(const struct cpu_user_regs *regs)
>          }
>          else if ( nmi_perfctr_msr == MSR_P6_PERFCTR0 )
>          {
> +            rdmsrl(MSR_P6_PERFCTR0, msr_content);
> +            if ( msr_content & (1ULL << P6_EVENT_WIDTH) )
> +                watchdog_tick = 0;
> +
>              /*
>               * Only P6 based Pentium M need to re-unmask the apic vector but
>               * it doesn't hurt other P6 variants.
>               */
>              apic_write(APIC_LVTPC, APIC_DM_NMI);
>          }
> +        else if ( nmi_perfctr_msr == MSR_K7_PERFCTR0 )
> +        {
> +            rdmsrl(MSR_K7_PERFCTR0, msr_content);
> +            if ( msr_content & (1ULL << K7_EVENT_WIDTH) )
> +                watchdog_tick = 0;
> +        }
>          write_watchdog_counter(NULL);
>      }
> +
> +    return watchdog_tick;
>  }
>  
>  /*
> diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
> index 71be2ae..7f5306f 100644
> --- a/xen/arch/x86/traps.c
> +++ b/xen/arch/x86/traps.c
> @@ -3306,14 +3306,15 @@ void do_nmi(const struct cpu_user_regs *regs)
>  {
>      unsigned int cpu = smp_processor_id();
>      unsigned char reason;
> +    bool_t handle_unknown = 0;
>  
>      ++nmi_count(cpu);
>  
>      if ( nmi_callback(regs, cpu) )
>          return;
>  
> -    if ( nmi_watchdog )
> -        nmi_watchdog_tick(regs);
> +    if ( !nmi_watchdog || (!nmi_watchdog_tick(regs) && watchdog_force) )
> +        handle_unknown = 1;
>  
>      /* Only the BSP gets external NMIs from the system. */
>      if ( cpu == 0 )
> @@ -3323,7 +3324,7 @@ void do_nmi(const struct cpu_user_regs *regs)
>              pci_serr_error(regs);
>          if ( reason & 0x40 )
>              io_check_error(regs);
> -        if ( !(reason & 0xc0) && !nmi_watchdog )
> +        if ( !(reason & 0xc0) && handle_unknown )
>              unknown_nmi_error(regs, reason);
>      }
>  }
> diff --git a/xen/include/asm-x86/apic.h b/xen/include/asm-x86/apic.h
> index 5d7623f..6697245 100644
> --- a/xen/include/asm-x86/apic.h
> +++ b/xen/include/asm-x86/apic.h
> @@ -206,7 +206,7 @@ extern void release_lapic_nmi(void);
>  extern void self_nmi(void);
>  extern void disable_timer_nmi_watchdog(void);
>  extern void enable_timer_nmi_watchdog(void);
> -extern void nmi_watchdog_tick (const struct cpu_user_regs *regs);
> +extern bool_t nmi_watchdog_tick (const struct cpu_user_regs *regs);
>  extern int APIC_init_uniprocessor (void);
>  extern void disable_APIC_timer(void);
>  extern void enable_APIC_timer(void);
> diff --git a/xen/include/asm-x86/nmi.h b/xen/include/asm-x86/nmi.h
> index 4504fe1..2c92db9 100644
> --- a/xen/include/asm-x86/nmi.h
> +++ b/xen/include/asm-x86/nmi.h
> @@ -8,6 +8,9 @@ struct cpu_user_regs;
>  
>  /* Watchdog boolean from the command line */
>  extern bool_t opt_watchdog;
> +
> +/* Watchdog force parameter from the command line */
> +extern bool_t watchdog_force;
>   
>  typedef int (*nmi_callback_t)(const struct cpu_user_regs *regs, int cpu);
>   

  reply	other threads:[~2014-08-28 14:07 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-27 16:25 [PATCH v3] x86/NMI: Allow processing unknown NMIs when watchdog is enabled Ross Lagerwall
2014-08-28 14:07 ` Andrew Cooper [this message]
2014-09-01 13:27   ` Ross Lagerwall
2014-09-01 14:51     ` Jan Beulich
2014-09-03 12:24       ` [PATCH] x86/NMI: allow passing just "watchdog" again Jan Beulich
2014-09-03 12:32         ` Andrew Cooper

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=53FF379C.8070701@citrix.com \
    --to=andrew.cooper3@citrix.com \
    --cc=jbeulich@suse.com \
    --cc=keir@xen.org \
    --cc=ross.lagerwall@citrix.com \
    --cc=xen-devel@lists.xen.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.