All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aravind Gopalakrishnan <aravind.gopalakrishnan@amd.com>
To: Chen Yucong <slaoub@gmail.com>, <bp@alien8.de>, <tony.luck@intel.com>
Cc: <ak@linux.intel.com>, <linux-edac@vger.kernel.org>,
	<linux-kernel@vger.kernel.org>
Subject: Re: [PATCH v3 1/2] x86, mce, severity: extend the the mce_severity mechanism to handle UCNA/DEFERRED error
Date: Mon, 10 Nov 2014 16:06:00 -0600	[thread overview]
Message-ID: <546136C8.5060104@amd.com> (raw)
In-Reply-To: <1415410821-15063-2-git-send-email-slaoub@gmail.com>

On 11/7/2014 7:40 PM, Chen Yucong wrote:
> Until now, the mce_severity mechanism can only identify the severity
> of UCNA error as MCE_KEEP_SEVERITY. Meanwhile, it is not able to filter
> out DEFERRED error for ADM platform.
>
> This patch aims to extend the mce_severity mechanism for handling
> UCNA/DEFERRED error. In order to do this, the patch introduces a new
> severity level - MCE_UCNA/DEFERRED_SEVERITY.
>
> In addition, mce_severity is specific to machine check exception,
> and it will check MCIP/EIPV/RIPV bits. In order to use mce_severity
> mechanism in non-exception context, the patch also introduces a new
> argument (is_excp) for mce_severity. `is_excp' is used to explicitly
> specify the calling context of mce_severity.
>
> Signed-off-by: Chen Yucong <slaoub@gmail.com>
> ---
>   arch/x86/include/asm/mce.h                |    4 ++++
>   arch/x86/kernel/cpu/mcheck/mce-internal.h |    4 +++-
>   arch/x86/kernel/cpu/mcheck/mce-severity.c |   21 ++++++++++++++++-----
>   arch/x86/kernel/cpu/mcheck/mce.c          |   14 ++++++++------
>   drivers/edac/mce_amd.h                    |    3 ---
>   5 files changed, 31 insertions(+), 15 deletions(-)
>
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 276392f..51b26e89 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -34,6 +34,10 @@
>   #define MCI_STATUS_S	 (1ULL<<56)  /* Signaled machine check */
>   #define MCI_STATUS_AR	 (1ULL<<55)  /* Action required */
>   
> +/* AMD-specific bits */
> +#define MCI_STATUS_DEFERRED	(1ULL<<44)  /* declare an uncorrected error */
> +#define MCI_STATUS_POISON	(1ULL<<43)  /* access poisonous data */
> +
>   /*
>    * Note that the full MCACOD field of IA32_MCi_STATUS MSR is
>    * bits 15:0.  But bit 12 is the 'F' bit, defined for corrected
> diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
> index 09edd0b..10b4690 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
> +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
> @@ -3,6 +3,8 @@
>   
>   enum severity_level {
>   	MCE_NO_SEVERITY,
> +	MCE_DEFERRED_SEVERITY,
> +	MCE_UCNA_SEVERITY = MCE_DEFERRED_SEVERITY,
>   	MCE_KEEP_SEVERITY,
>   	MCE_SOME_SEVERITY,
>   	MCE_AO_SEVERITY,
> @@ -21,7 +23,7 @@ struct mce_bank {
>   	char			attrname[ATTR_LEN];	/* attribute name */
>   };
>   
> -int mce_severity(struct mce *a, int tolerant, char **msg);
> +int mce_severity(struct mce *a, int tolerant, char **msg, bool is_excp);
>   struct dentry *mce_get_debugfs_dir(void);
>   
>   extern struct mce_bank *mce_banks;
> diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> index c370e1c..c61feb3 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
> @@ -31,6 +31,7 @@
>   
>   enum context { IN_KERNEL = 1, IN_USER = 2 };
>   enum ser { SER_REQUIRED = 1, NO_SER = 2 };
> +enum exception { EXCP_CONTEXT = 1, NO_EXCP = 2 };
>   
>   static struct severity {
>   	u64 mask;
> @@ -40,6 +41,7 @@ static struct severity {
>   	unsigned char mcgres;
>   	unsigned char ser;
>   	unsigned char context;
> +	unsigned char excp;
>   	unsigned char covered;
>   	char *msg;
>   } severities[] = {
> @@ -48,6 +50,8 @@ static struct severity {
>   #define  USER		.context = IN_USER
>   #define  SER		.ser = SER_REQUIRED
>   #define  NOSER		.ser = NO_SER
> +#define  EXCP		.excp = EXCP_CONTEXT
> +#define  NOEXCP		.excp = NO_EXCP
>   #define  BITCLR(x)	.mask = x, .result = 0
>   #define  BITSET(x)	.mask = x, .result = x
>   #define  MCGMASK(x, y)	.mcgmask = x, .mcgres = y
> @@ -71,16 +75,20 @@ static struct severity {
>   	/* When MCIP is not set something is very confused */
>   	MCESEV(
>   		PANIC, "MCIP not set in MCA handler",
> -		MCGMASK(MCG_STATUS_MCIP, 0)
> +		EXCP, MCGMASK(MCG_STATUS_MCIP, 0)
>   		),
>   	/* Neither return not error IP -- no chance to recover -> PANIC */
>   	MCESEV(
>   		PANIC, "Neither restart nor error IP",
> -		MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
> +		EXCP, MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0)
>   		),
>   	MCESEV(
>   		PANIC, "In kernel and no restart IP",
> -		KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
> +		EXCP, KERNEL, MCGMASK(MCG_STATUS_RIPV, 0)
> +		),
> +	MCESEV(
> +		DEFERRED, "Deferred error",
> +		NOSER, MASK(MCI_STATUS_UC|MCI_STATUS_DEFERRED|MCI_STATUS_POISON, MCI_STATUS_DEFERRED)
>   		),

We don't need to have MCI_STATUS_POISON in the MASK() here as a deferred 
error is indicated by a {UC=0, Deferred = 1}
(Older docs might be unclear on that..)

And it still says ADM on the commit message :)

- Aravind.

  reply	other threads:[~2014-11-10 22:06 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-11-08  1:40 [PATCH v3 0/2]RAS: add the support for handling UCNA/DEFERRED error Chen Yucong
2014-11-08  1:40 ` [PATCH v3 1/2] x86, mce, severity: extend the the mce_severity mechanism to handle " Chen Yucong
2014-11-10 22:06   ` Aravind Gopalakrishnan [this message]
2014-11-10 22:17     ` Borislav Petkov
2014-11-10 23:03       ` Aravind Gopalakrishnan
2014-11-10 23:32       ` Luck, Tony
2014-11-11  8:56         ` Borislav Petkov
2014-11-11 18:44           ` Luck, Tony
2014-11-12  1:03             ` Chen Yucong
2014-11-12 18:28               ` Luck, Tony
2014-11-08  1:40 ` [PATCH v3 2/2] x86, mce: support memory error recovery for both UCNA and Deferred error in machine_check_poll Chen Yucong
2014-11-10 19:06   ` Borislav Petkov
2014-11-10 21:37     ` Borislav Petkov
2014-11-10 21:44       ` Luck, Tony
2014-11-10 21:47         ` Borislav Petkov
2014-11-10 16:42 ` [PATCH v3 0/2]RAS: add the support for handling UCNA/DEFERRED error Borislav Petkov
2014-11-10 18:47   ` Luck, Tony

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=546136C8.5060104@amd.com \
    --to=aravind.gopalakrishnan@amd.com \
    --cc=ak@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=slaoub@gmail.com \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.