public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
To: "Luck, Tony" <tony.luck@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>, Borislav Petkov <bp@amd64.org>,
	linux-kernel@vger.kernel.org, "Huang,
	Ying" <ying.huang@intel.com>, Avi Kivity <avi@redhat.com>
Subject: Re: [PATCH 10/10] MCE: Add Action-Required support
Date: Fri, 10 Jun 2011 17:06:16 +0900	[thread overview]
Message-ID: <4DF1D078.7070207@jp.fujitsu.com> (raw)
In-Reply-To: <4df13d6327307cf53@agluck-desktop.sc.intel.com>

(2011/06/10 6:38), Luck, Tony wrote:
> From: Tony Luck <tony.luck@intel.com>
> 
> Implement core MCA recovery. This is used for errors
> that happen in the current execution context.
> 
> The kernel has to first pass the error information
> to a function running on the current process stack.
> This is done using task_return_notifier_register().
> 
> Just handle errors in user mode for now. Later we
> may be able to handle some kernel cases (e.g. when
> kernel is in copy_*_user())
> 
> Based on some original code by Andi Kleen.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  arch/x86/kernel/cpu/mcheck/mce-severity.c |   35 +++++++-
>  arch/x86/kernel/cpu/mcheck/mce.c          |  118 +++++++++++++++++++++++++++--
>  2 files changed, 142 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> index 352d16a..fe8a28c 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> @@ -13,6 +13,7 @@
>  #include <linux/seq_file.h>
>  #include <linux/init.h>
>  #include <linux/debugfs.h>
> +#include <linux/module.h>
>  #include <asm/mce.h>
>  
>  #include "mce-internal.h"
> @@ -54,6 +55,9 @@ static struct severity {
>  	{ .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
>  #define MASK(x, y, s, m, r...) \
>  	{ .mask = x, .result = y, SEV(s), .msg = m, ## r }
> +#define ARMASK(x, y, s, m, r...) \
> +	{ .mcgmask = MCG_STATUS_RIPV, .mcgres = 0, \
> +	  .mask = x, .result = y, SEV(s), .msg = m, ## r }
>  #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
>  #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
>  #define MCACOD 0xffff
> @@ -67,7 +71,7 @@ static struct severity {
>  	MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
>  		"Neither restart nor error IP"),
>  	MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
> -		KERNEL),
> +		KERNEL, NOSER),
>  	BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
>  
>  	/* ignore OVER for UCNA */
> @@ -77,10 +81,16 @@ static struct severity {
>  	     "Illegal combination (UCNA with AR=1)", SER),
>  	MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
>  
> -	/* AR add known MCACODs here */
>  	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
>  	     "Action required with lost events", SER),
> -	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR, PANIC,
> +
> +	/* known AR MCACODs: */
> +	ARMASK(MCI_UC_SAR|MCI_STATUS_OVER|0xffff, MCI_UC_SAR|0x134, AR,
> +	     "Action required: data load error", SER),
> +	ARMASK(MCI_UC_SAR|MCI_STATUS_OVER|0xffff, MCI_UC_SAR|0x150, AR,
> +	     "Action required: instruction fetch error", SER),
> +
> +	ARMASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR, PANIC,
>  	     "Action required; unknown MCACOD", SER),
>  
>  	/* known AO MCACODs: */
> @@ -89,6 +99,7 @@ static struct severity {
>  	MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
>  	     "Action optional: last level cache writeback error", SER),
>  
> +
>  	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
>  	     "Action optional unknown MCACOD", SER),
>  	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
> @@ -110,6 +121,17 @@ static int error_context(struct mce *m)
>  	return IN_KERNEL;
>  }
>  
> +static int kernel_ar_recoverable(struct mce *m, int tolerant)
> +{
> +	if (tolerant >= 2)
> +		return MCE_AR_SEVERITY;
> +	if (!(m->mcgstatus & MCG_STATUS_EIPV) || !m->ip)
> +		return MCE_PANIC_SEVERITY;
> +	if (search_exception_tables(m->ip))
> +		return MCE_AR_SEVERITY;
> +	return MCE_PANIC_SEVERITY;
> +}
> +

You said "Just handle errors in user mode for now." but ...?

>  int mce_severity(struct mce *a, int tolerant, char **msg)
>  {
>  	enum context ctx = error_context(a);
> @@ -129,9 +151,12 @@ int mce_severity(struct mce *a, int tolerant, char **msg)
>  		if (msg)
>  			*msg = s->msg;
>  		s->covered = 1;
> -		if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
> -			if (panic_on_oops || tolerant < 1)
> +		if (ctx == IN_KERNEL) {
> +			if (s->sev >= MCE_UC_SEVERITY &&
> +				(panic_on_oops || tolerant < 1))
>  				return MCE_PANIC_SEVERITY;
> +			if (s->sev == MCE_AR_SEVERITY)
> +				return kernel_ar_recoverable(a, tolerant);
>  		}
>  		return s->sev;
>  	}
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 9c72245..a7a8c53 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -80,6 +80,20 @@ static void mce_do_notify(struct user_return_notifier *urn);
>  static DEFINE_PER_CPU(struct mce_notify, mce_notify);
>  
>  /*
> + * Task return notifiers are used for "action required"
> + * recovery of tasks - i.e. we prevent return to the task
> + * that encountered the machine check, but we ensure that
> + * we process the error in task context.
> + */
> +struct task_notify {
> +	struct user_return_notifier urn;
> +	unsigned	long pfn;
> +	atomic_t	inuse;
> +};
> +static struct task_notify task_notifier[NR_CPUS];
> +static void mce_do_task(struct user_return_notifier *urn);
> +
> +/*
>   * Tolerant levels:
>   *   0: always panic on uncorrected errors, log corrected errors
>   *   1: panic or SIGBUS on uncorrected errors, log corrected errors
> @@ -975,6 +989,84 @@ static void mce_clear_state(unsigned long *toclear)
>  	}
>  }
>  
> +/* Stub when hwpoison is not compiled in */
> +int __attribute__((weak)) __memory_failure(unsigned long pfn, int vector,
> +					   int precount)
> +{
> +	return -1;
> +}
> +
> +/*
> + * Uncorrected error for current process.
> + */
> +static void mce_action_required(struct mce *m, char *msg, struct pt_regs *regs)
> +{
> +	int	i;
> +
> +	if (!mce_usable_address(m))
> +		mce_panic("No address for Action-Required Machine Check",
> +			  m, msg);
> +	if (!(m->mcgstatus & MCG_STATUS_EIPV))
> +		mce_panic("No EIPV for Action-Required Machine Check",
> +			  m, msg);

When can this happen?
Why not create new severity {PANIC, "Action Required: but No EIPV", ...}
in severity table?

> +
> +	for (i = 0; i < NR_CPUS; i++)
> +		if (!atomic_cmpxchg(&task_notifier[i].inuse, 0, 1))
> +			break;
> +	if (i == NR_CPUS)
> +		mce_panic("Too many concurrent errors", m, msg);
> +
> +	task_notifier[i].urn.on_user_return = mce_do_task;
> +	task_notifier[i].pfn = m->addr >> PAGE_SHIFT;
> +	task_return_notifier_register(&task_notifier[i].urn);
> +}
> +
> +#undef pr_fmt
> +#define pr_fmt(x) "MCE: %s:%d " x "\n", current->comm, current->pid
> +#define PADDR(x) ((u64)(x) << PAGE_SHIFT)
> +
> +/*
> + * No successfull recovery. Make sure at least that there's
> + * a SIGBUS.
> + */
> +static void ar_fallback(struct task_struct *me, unsigned long pfn)
> +{
> +	if (signal_pending(me) && sigismember(&me->pending.signal, SIGBUS))
> +		return;

Is it safe for _AR if SIGBUS is pending but blocked?
I think force_sig() is reasonable for such situation.

> +
> +	/*
> +	 * For some reason hwpoison wasn't able to send a proper
> +	 * SIGBUS.  Send a fallback signal. Unfortunately we don't
> +	 * know the virtual address here, so can't tell the program
> +	 * details.
> +	 */
> +	force_sig(SIGBUS, me);
> +	pr_err("Killed due to action-required memory corruption");
> +}
> +
> +/*
> + * Handle action-required on the process stack.  hwpoison does the
> + * bulk of the work and with some luck might even be able to fix the
> + * problem.
> + *
> + * Logic changes here should be reflected in kernel_ar_recoverable().
> + */
> +static void handle_action_required(unsigned long pfn)
> +{
> +	struct task_struct *me = current;
> +
> +	pr_err("Uncorrected hardware memory error in user-access at %llx",
> +	       PADDR(pfn));
> +	if (__memory_failure(pfn, MCE_VECTOR, 0) < 0) {
> +		pr_err("Memory error not recovered");
> +		ar_fallback(me, pfn);
> +	} else
> +		pr_err("Memory error recovered");
> +}
> +
> +#undef pr_fmt
> +#define pr_fmt(x) x
> +
>  /*
>   * The actual machine check handler. This only handles real
>   * exceptions when something got corrupted coming in through int 18.
> @@ -1086,12 +1178,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
>  			continue;
>  		}
>  
> -		/*
> -		 * Kill on action required.
> -		 */
> -		if (severity == MCE_AR_SEVERITY)
> -			kill_it = 1;
> -
>  		mce_read_aux(&m, i);
>  
>  		/*
> @@ -1136,6 +1222,15 @@ void do_machine_check(struct pt_regs *regs, long error_code)
>  		mce_panic("Fatal machine check on current CPU", &m, msg);
>  
>  	/*
> +	 * Do recovery in current process if needed. This has to be delayed
> +	 * until we're back on the process stack.
> +	 */
> +	if (worst == MCE_AR_SEVERITY) {
> +		mce_action_required(&m, msg, regs);

Comprehensible name would be appreciated, e.g.:
		mce_request_dpc_for_action_required(pfn);

And if we cannot request context for recovery, it is better to suppress
trailing attempts, e.g. before mce_end():

	if (!no_way_out && severity == MCE_AR_SEVERITY) {
		err = mce_request_dpc_for_action_required(pfn);
		if (err) {
			atomic_inc(&global_nwo);
			severity = MCE_PANIC_SEVERITY;	/* escalated */
		}
	}


Thanks,
H.Seto

> +		kill_it = 0;
> +	}
> +
> +	/*
>  	 * If the error seems to be unrecoverable, something should be
>  	 * done.  Try to kill as little as possible.  If we can kill just
>  	 * one task, do that.  If the user has set the tolerance very
> @@ -1194,6 +1289,17 @@ static void mce_do_notify(struct user_return_notifier *urn)
>  	mce_process_ring();
>  }
>  
> +static void mce_do_task(struct user_return_notifier *urn)
> +{
> +	struct task_notify *np = container_of(urn, struct task_notify, urn);
> +	unsigned long pfn = np->pfn;
> +
> +	task_return_notifier_unregister(urn);
> +	atomic_set(&np->inuse, 0);
> +
> +	handle_action_required(pfn);
> +}
> +
>  static void mce_process_work(struct work_struct *dummy)
>  {
>  	mce_process_ring();


  reply	other threads:[~2011-06-10  8:09 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-09 21:25 [RFC] reworked machine check recovery patches Luck, Tony
2011-06-09 21:29 ` [PATCH 01/10] MCE: fixes for mce severity table Luck, Tony
2011-06-09 21:30 ` [PATCH 02/10] MCE: save most severe error information Luck, Tony
2011-06-10  8:06   ` Hidetoshi Seto
2011-06-10 18:08     ` Tony Luck
2011-06-09 21:31 ` [PATCH 03/10] MCE: introduce mce_gather_info() Luck, Tony
2011-06-09 21:32 ` [PATCH 04/10] MCE: Move ADDR/MISC reading code into common function Luck, Tony
2011-06-10  9:33   ` Borislav Petkov
2011-06-10 18:17     ` Tony Luck
2011-06-09 21:33 ` [PATCH 05/10] MCE: Mask out address mask bits below address granuality Luck, Tony
2011-06-10  8:07   ` Hidetoshi Seto
2011-06-10  9:46     ` Borislav Petkov
2011-06-10 19:06     ` Tony Luck
2011-06-11  0:12       ` Andi Kleen
2011-06-10  9:42   ` Borislav Petkov
2011-06-10 19:09     ` Tony Luck
2011-06-09 21:34 ` [PATCH 06/10] HWPOISON: Handle hwpoison in current process Luck, Tony
2011-06-10  8:07   ` Hidetoshi Seto
2011-06-10 20:36     ` Tony Luck
2011-06-09 21:35 ` [PATCH 07/10] MCE: replace mce.c use of TIF_MCE_NOTIFY with user_return_notifier Luck, Tony
2011-06-10  8:08   ` Hidetoshi Seto
2011-06-10 20:42     ` Tony Luck
2011-06-11 10:24       ` Borislav Petkov
2011-06-12  8:31       ` Avi Kivity
2011-06-12  8:29   ` Avi Kivity
2011-06-12 10:24     ` Borislav Petkov
2011-06-12 10:30       ` Avi Kivity
2011-06-12 13:53         ` Borislav Petkov
2011-06-09 21:36 ` [PATCH 08/10] NOTIFIER: Take over TIF_MCE_NOTIFY and implement task return notifier Luck, Tony
2011-06-12 22:38   ` Borislav Petkov
2011-06-13  5:31     ` Tony Luck
2011-06-13  7:59       ` Avi Kivity
2011-06-13  9:55         ` Borislav Petkov
2011-06-13 11:40           ` Avi Kivity
2011-06-13 12:40             ` Borislav Petkov
2011-06-13 12:47               ` Avi Kivity
2011-06-13 15:12                 ` Borislav Petkov
2011-06-13 16:31                   ` Avi Kivity
2011-06-13 17:13                     ` Tony Luck
2011-06-14  2:50                       ` Hidetoshi Seto
2011-06-14  2:51                         ` [PATCH 1/2] x86, mce: introduce mce_memory_failure_process Hidetoshi Seto
2011-06-14  2:53                         ` [PATCH 2/2] x86, mce: rework use of TIF_MCE_NOTIFY Hidetoshi Seto
2011-06-14 18:02                           ` Tony Luck
2011-06-14 18:28                             ` Tony Luck
2011-06-15  1:29                               ` Hidetoshi Seto
2011-06-15  2:10                                 ` Tony Luck
2011-06-15  3:17                                   ` Hidetoshi Seto
2011-06-14  3:09                         ` [PATCH 08/10] NOTIFIER: Take over TIF_MCE_NOTIFY and implement task return notifier Tony Luck
2011-06-14 11:40                       ` Avi Kivity
2011-06-14 13:33                         ` Borislav Petkov
2011-06-14 13:43                           ` Avi Kivity
2011-06-14 17:13                             ` Luck, Tony
2011-06-15  8:51                               ` Avi Kivity
2011-06-14 16:59                         ` Luck, Tony
2011-06-15  8:52                           ` Avi Kivity
2011-06-13 16:43               ` Tony Luck
2011-06-09 21:37 ` [PATCH 09/10] MCE: run through processors with more severe problems first Luck, Tony
2011-06-10  8:09   ` Hidetoshi Seto
2011-06-10 20:49     ` Tony Luck
2011-06-13 22:03       ` Tony Luck
2011-06-14  1:27         ` Hidetoshi Seto
2011-06-14  3:04           ` Tony Luck
2011-06-09 21:38 ` [PATCH 10/10] MCE: Add Action-Required support Luck, Tony
2011-06-10  8:06   ` Hidetoshi Seto [this message]
2011-06-10 21:00     ` Tony Luck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4DF1D078.7070207@jp.fujitsu.com \
    --to=seto.hidetoshi@jp.fujitsu.com \
    --cc=avi@redhat.com \
    --cc=bp@amd64.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=tony.luck@intel.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox