All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
To: "Luck, Tony" <tony.luck@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>, Borislav Petkov <bp@amd64.org>,
	linux-kernel@vger.kernel.org, "Huang,
	Ying" <ying.huang@intel.com>, Avi Kivity <avi@redhat.com>
Subject: Re: [PATCH 10/10] MCE: Add Action-Required support
Date: Fri, 10 Jun 2011 17:06:16 +0900	[thread overview]
Message-ID: <4DF1D078.7070207@jp.fujitsu.com> (raw)
In-Reply-To: <4df13d6327307cf53@agluck-desktop.sc.intel.com>

(2011/06/10 6:38), Luck, Tony wrote:
> From: Tony Luck <tony.luck@intel.com>
> 
> Implement core MCA recovery. This is used for errors
> that happen in the current execution context.
> 
> The kernel has to first pass the error information
> to a function running on the current process stack.
> This is done using task_return_notifier_register().
> 
> Just handle errors in user mode for now. Later we
> may be able to handle some kernel cases (e.g. when
> kernel is in copy_*_user())
> 
> Based on some original code by Andi Kleen.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> ---
>  arch/x86/kernel/cpu/mcheck/mce-severity.c |   35 +++++++-
>  arch/x86/kernel/cpu/mcheck/mce.c          |  118 +++++++++++++++++++++++++++--
>  2 files changed, 142 insertions(+), 11 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> index 352d16a..fe8a28c 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> @@ -13,6 +13,7 @@
>  #include <linux/seq_file.h>
>  #include <linux/init.h>
>  #include <linux/debugfs.h>
> +#include <linux/module.h>
>  #include <asm/mce.h>
>  
>  #include "mce-internal.h"
> @@ -54,6 +55,9 @@ static struct severity {
>  	{ .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r }
>  #define MASK(x, y, s, m, r...) \
>  	{ .mask = x, .result = y, SEV(s), .msg = m, ## r }
> +#define ARMASK(x, y, s, m, r...) \
> +	{ .mcgmask = MCG_STATUS_RIPV, .mcgres = 0, \
> +	  .mask = x, .result = y, SEV(s), .msg = m, ## r }
>  #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
>  #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
>  #define MCACOD 0xffff
> @@ -67,7 +71,7 @@ static struct severity {
>  	MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC,
>  		"Neither restart nor error IP"),
>  	MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP",
> -		KERNEL),
> +		KERNEL, NOSER),
>  	BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER),
>  
>  	/* ignore OVER for UCNA */
> @@ -77,10 +81,16 @@ static struct severity {
>  	     "Illegal combination (UCNA with AR=1)", SER),
>  	MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER),
>  
> -	/* AR add known MCACODs here */
>  	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC,
>  	     "Action required with lost events", SER),
> -	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR, PANIC,
> +
> +	/* known AR MCACODs: */
> +	ARMASK(MCI_UC_SAR|MCI_STATUS_OVER|0xffff, MCI_UC_SAR|0x134, AR,
> +	     "Action required: data load error", SER),
> +	ARMASK(MCI_UC_SAR|MCI_STATUS_OVER|0xffff, MCI_UC_SAR|0x150, AR,
> +	     "Action required: instruction fetch error", SER),
> +
> +	ARMASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR, PANIC,
>  	     "Action required; unknown MCACOD", SER),
>  
>  	/* known AO MCACODs: */
> @@ -89,6 +99,7 @@ static struct severity {
>  	MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO,
>  	     "Action optional: last level cache writeback error", SER),
>  
> +
>  	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME,
>  	     "Action optional unknown MCACOD", SER),
>  	MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME,
> @@ -110,6 +121,17 @@ static int error_context(struct mce *m)
>  	return IN_KERNEL;
>  }
>  
> +static int kernel_ar_recoverable(struct mce *m, int tolerant)
> +{
> +	if (tolerant >= 2)
> +		return MCE_AR_SEVERITY;
> +	if (!(m->mcgstatus & MCG_STATUS_EIPV) || !m->ip)
> +		return MCE_PANIC_SEVERITY;
> +	if (search_exception_tables(m->ip))
> +		return MCE_AR_SEVERITY;
> +	return MCE_PANIC_SEVERITY;
> +}
> +

You said "Just handle errors in user mode for now." but ...?

>  int mce_severity(struct mce *a, int tolerant, char **msg)
>  {
>  	enum context ctx = error_context(a);
> @@ -129,9 +151,12 @@ int mce_severity(struct mce *a, int tolerant, char **msg)
>  		if (msg)
>  			*msg = s->msg;
>  		s->covered = 1;
> -		if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) {
> -			if (panic_on_oops || tolerant < 1)
> +		if (ctx == IN_KERNEL) {
> +			if (s->sev >= MCE_UC_SEVERITY &&
> +				(panic_on_oops || tolerant < 1))
>  				return MCE_PANIC_SEVERITY;
> +			if (s->sev == MCE_AR_SEVERITY)
> +				return kernel_ar_recoverable(a, tolerant);
>  		}
>  		return s->sev;
>  	}
> diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
> index 9c72245..a7a8c53 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce.c
> @@ -80,6 +80,20 @@ static void mce_do_notify(struct user_return_notifier *urn);
>  static DEFINE_PER_CPU(struct mce_notify, mce_notify);
>  
>  /*
> + * Task return notifiers are used for "action required"
> + * recovery of tasks - i.e. we prevent return to the task
> + * that encountered the machine check, but we ensure that
> + * we process the error in task context.
> + */
> +struct task_notify {
> +	struct user_return_notifier urn;
> +	unsigned	long pfn;
> +	atomic_t	inuse;
> +};
> +static struct task_notify task_notifier[NR_CPUS];
> +static void mce_do_task(struct user_return_notifier *urn);
> +
> +/*
>   * Tolerant levels:
>   *   0: always panic on uncorrected errors, log corrected errors
>   *   1: panic or SIGBUS on uncorrected errors, log corrected errors
> @@ -975,6 +989,84 @@ static void mce_clear_state(unsigned long *toclear)
>  	}
>  }
>  
> +/* Stub when hwpoison is not compiled in */
> +int __attribute__((weak)) __memory_failure(unsigned long pfn, int vector,
> +					   int precount)
> +{
> +	return -1;
> +}
> +
> +/*
> + * Uncorrected error for current process.
> + */
> +static void mce_action_required(struct mce *m, char *msg, struct pt_regs *regs)
> +{
> +	int	i;
> +
> +	if (!mce_usable_address(m))
> +		mce_panic("No address for Action-Required Machine Check",
> +			  m, msg);
> +	if (!(m->mcgstatus & MCG_STATUS_EIPV))
> +		mce_panic("No EIPV for Action-Required Machine Check",
> +			  m, msg);

When can this happen?
Why not create new severity {PANIC, "Action Required: but No EIPV", ...}
in severity table?

> +
> +	for (i = 0; i < NR_CPUS; i++)
> +		if (!atomic_cmpxchg(&task_notifier[i].inuse, 0, 1))
> +			break;
> +	if (i == NR_CPUS)
> +		mce_panic("Too many concurrent errors", m, msg);
> +
> +	task_notifier[i].urn.on_user_return = mce_do_task;
> +	task_notifier[i].pfn = m->addr >> PAGE_SHIFT;
> +	task_return_notifier_register(&task_notifier[i].urn);
> +}
> +
> +#undef pr_fmt
> +#define pr_fmt(x) "MCE: %s:%d " x "\n", current->comm, current->pid
> +#define PADDR(x) ((u64)(x) << PAGE_SHIFT)
> +
> +/*
> + * No successfull recovery. Make sure at least that there's
> + * a SIGBUS.
> + */
> +static void ar_fallback(struct task_struct *me, unsigned long pfn)
> +{
> +	if (signal_pending(me) && sigismember(&me->pending.signal, SIGBUS))
> +		return;

Is it safe for _AR if SIGBUS is pending but blocked?
I think force_sig() is reasonable for such situation.

> +
> +	/*
> +	 * For some reason hwpoison wasn't able to send a proper
> +	 * SIGBUS.  Send a fallback signal. Unfortunately we don't
> +	 * know the virtual address here, so can't tell the program
> +	 * details.
> +	 */
> +	force_sig(SIGBUS, me);
> +	pr_err("Killed due to action-required memory corruption");
> +}
> +
> +/*
> + * Handle action-required on the process stack.  hwpoison does the
> + * bulk of the work and with some luck might even be able to fix the
> + * problem.
> + *
> + * Logic changes here should be reflected in kernel_ar_recoverable().
> + */
> +static void handle_action_required(unsigned long pfn)
> +{
> +	struct task_struct *me = current;
> +
> +	pr_err("Uncorrected hardware memory error in user-access at %llx",
> +	       PADDR(pfn));
> +	if (__memory_failure(pfn, MCE_VECTOR, 0) < 0) {
> +		pr_err("Memory error not recovered");
> +		ar_fallback(me, pfn);
> +	} else
> +		pr_err("Memory error recovered");
> +}
> +
> +#undef pr_fmt
> +#define pr_fmt(x) x
> +
>  /*
>   * The actual machine check handler. This only handles real
>   * exceptions when something got corrupted coming in through int 18.
> @@ -1086,12 +1178,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
>  			continue;
>  		}
>  
> -		/*
> -		 * Kill on action required.
> -		 */
> -		if (severity == MCE_AR_SEVERITY)
> -			kill_it = 1;
> -
>  		mce_read_aux(&m, i);
>  
>  		/*
> @@ -1136,6 +1222,15 @@ void do_machine_check(struct pt_regs *regs, long error_code)
>  		mce_panic("Fatal machine check on current CPU", &m, msg);
>  
>  	/*
> +	 * Do recovery in current process if needed. This has to be delayed
> +	 * until we're back on the process stack.
> +	 */
> +	if (worst == MCE_AR_SEVERITY) {
> +		mce_action_required(&m, msg, regs);

Comprehensible name would be appreciated, e.g.:
		mce_request_dpc_for_action_required(pfn);

And if we cannot request context for recovery, it is better to suppress
trailing attempts, e.g. before mce_end():

	if (!no_way_out && severity == MCE_AR_SEVERITY) {
		err = mce_request_dpc_for_action_required(pfn);
		if (err) {
			atomic_inc(&global_nwo);
			severity = MCE_PANIC_SEVERITY;	/* escalated */
		}
	}


Thanks,
H.Seto

> +		kill_it = 0;
> +	}
> +
> +	/*
>  	 * If the error seems to be unrecoverable, something should be
>  	 * done.  Try to kill as little as possible.  If we can kill just
>  	 * one task, do that.  If the user has set the tolerance very
> @@ -1194,6 +1289,17 @@ static void mce_do_notify(struct user_return_notifier *urn)
>  	mce_process_ring();
>  }
>  
> +static void mce_do_task(struct user_return_notifier *urn)
> +{
> +	struct task_notify *np = container_of(urn, struct task_notify, urn);
> +	unsigned long pfn = np->pfn;
> +
> +	task_return_notifier_unregister(urn);
> +	atomic_set(&np->inuse, 0);
> +
> +	handle_action_required(pfn);
> +}
> +
>  static void mce_process_work(struct work_struct *dummy)
>  {
>  	mce_process_ring();


  reply	other threads:[~2011-06-10  8:09 UTC|newest]

Thread overview: 65+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-06-09 21:25 [RFC] reworked machine check recovery patches Luck, Tony
2011-06-09 21:29 ` [PATCH 01/10] MCE: fixes for mce severity table Luck, Tony
2011-06-09 21:30 ` [PATCH 02/10] MCE: save most severe error information Luck, Tony
2011-06-10  8:06   ` Hidetoshi Seto
2011-06-10 18:08     ` Tony Luck
2011-06-09 21:31 ` [PATCH 03/10] MCE: introduce mce_gather_info() Luck, Tony
2011-06-09 21:32 ` [PATCH 04/10] MCE: Move ADDR/MISC reading code into common function Luck, Tony
2011-06-10  9:33   ` Borislav Petkov
2011-06-10 18:17     ` Tony Luck
2011-06-09 21:33 ` [PATCH 05/10] MCE: Mask out address mask bits below address granuality Luck, Tony
2011-06-10  8:07   ` Hidetoshi Seto
2011-06-10  9:46     ` Borislav Petkov
2011-06-10 19:06     ` Tony Luck
2011-06-11  0:12       ` Andi Kleen
2011-06-10  9:42   ` Borislav Petkov
2011-06-10 19:09     ` Tony Luck
2011-06-09 21:34 ` [PATCH 06/10] HWPOISON: Handle hwpoison in current process Luck, Tony
2011-06-10  8:07   ` Hidetoshi Seto
2011-06-10 20:36     ` Tony Luck
2011-06-09 21:35 ` [PATCH 07/10] MCE: replace mce.c use of TIF_MCE_NOTIFY with user_return_notifier Luck, Tony
2011-06-10  8:08   ` Hidetoshi Seto
2011-06-10 20:42     ` Tony Luck
2011-06-11 10:24       ` Borislav Petkov
2011-06-12  8:31       ` Avi Kivity
2011-06-12  8:29   ` Avi Kivity
2011-06-12 10:24     ` Borislav Petkov
2011-06-12 10:30       ` Avi Kivity
2011-06-12 13:53         ` Borislav Petkov
2011-06-09 21:36 ` [PATCH 08/10] NOTIFIER: Take over TIF_MCE_NOTIFY and implement task return notifier Luck, Tony
2011-06-12 22:38   ` Borislav Petkov
2011-06-13  5:31     ` Tony Luck
2011-06-13  7:59       ` Avi Kivity
2011-06-13  9:55         ` Borislav Petkov
2011-06-13 11:40           ` Avi Kivity
2011-06-13 12:40             ` Borislav Petkov
2011-06-13 12:47               ` Avi Kivity
2011-06-13 15:12                 ` Borislav Petkov
2011-06-13 16:31                   ` Avi Kivity
2011-06-13 17:13                     ` Tony Luck
2011-06-14  2:50                       ` Hidetoshi Seto
2011-06-14  2:51                         ` [PATCH 1/2] x86, mce: introduce mce_memory_failure_process Hidetoshi Seto
2011-06-14  2:53                         ` [PATCH 2/2] x86, mce: rework use of TIF_MCE_NOTIFY Hidetoshi Seto
2011-06-14 18:02                           ` Tony Luck
2011-06-14 18:28                             ` Tony Luck
2011-06-15  1:29                               ` Hidetoshi Seto
2011-06-15  2:10                                 ` Tony Luck
2011-06-15  3:17                                   ` Hidetoshi Seto
2011-06-14  3:09                         ` [PATCH 08/10] NOTIFIER: Take over TIF_MCE_NOTIFY and implement task return notifier Tony Luck
2011-06-14 11:40                       ` Avi Kivity
2011-06-14 13:33                         ` Borislav Petkov
2011-06-14 13:43                           ` Avi Kivity
2011-06-14 17:13                             ` Luck, Tony
2011-06-15  8:51                               ` Avi Kivity
2011-06-14 16:59                         ` Luck, Tony
2011-06-15  8:52                           ` Avi Kivity
2011-06-13 16:43               ` Tony Luck
2011-06-09 21:37 ` [PATCH 09/10] MCE: run through processors with more severe problems first Luck, Tony
2011-06-10  8:09   ` Hidetoshi Seto
2011-06-10 20:49     ` Tony Luck
2011-06-13 22:03       ` Tony Luck
2011-06-14  1:27         ` Hidetoshi Seto
2011-06-14  3:04           ` Tony Luck
2011-06-09 21:38 ` [PATCH 10/10] MCE: Add Action-Required support Luck, Tony
2011-06-10  8:06   ` Hidetoshi Seto [this message]
2011-06-10 21:00     ` Tony Luck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4DF1D078.7070207@jp.fujitsu.com \
    --to=seto.hidetoshi@jp.fujitsu.com \
    --cc=avi@redhat.com \
    --cc=bp@amd64.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=tony.luck@intel.com \
    --cc=ying.huang@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.