All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mauro Carvalho Chehab <mchehab@redhat.com>
To: Borislav Petkov <bp@amd64.org>
Cc: Tony Luck <tony.luck@intel.com>, Ingo Molnar <mingo@elte.hu>,
	EDAC devel <linux-edac@vger.kernel.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Borislav Petkov <borislav.petkov@amd.com>
Subject: Re: [PATCH 4/4] EDAC: Convert AMD EDAC pieces to use RAS printk buffer
Date: Mon, 05 Mar 2012 08:43:59 -0300	[thread overview]
Message-ID: <4F54A6FF.50502@redhat.com> (raw)
In-Reply-To: <20120305110441.GC1070@aftab>

Em 05-03-2012 08:04, Borislav Petkov escreveu:
> On Fri, Mar 02, 2012 at 11:52:59AM -0300, Mauro Carvalho Chehab wrote:
> 
> [..]
> 
>> 	- the "ras_agent" helper functions is used only for amd64_edac. There's
>> no reason for use it elsewhere;
> 
> [..]
> 
> That can be fixed very easily with the patch below ontop of the current
> series (I'll add a proper version of it to the series later). This way,
> one does ras_printk() and builds up the message and then, when its done,
> calls into the tracepoint like this:
> 
> 	trace_mce_record(ras_get_decoded_err(), m);
> 
> where m is struct mce.

It is still adding an amd64-specific code inside the core, as no other driver will
use the amd64 "ras_agent".

Instead of doing that, you should do the opposite: call the EDAC report function
passing ras_get_decoded_err() as the string parameter of the function. This way,
the EDAC core will work the same way with amd64 and with the other drivers.

> 
> The result is:
> 
> [Hardware Error]: CPU:0 MC4_STATUS[-|UE|-|PCC|AddrV|UECC]: 0xb607a10009080a23 MC4_ADDR: 0x0000000955647380
> [Hardware Error]: Northbridge Error (node 0): DRAM ECC error detected on the NB.
> [Hardware Error]: EDAC MC2: UE page 0x955647, offset 0x380, grain 0, row 2, labels ":": amd64_edac
> 
> ^^^ This line comes from the respective edac driver.
> 
> [Hardware Error]: cache level: L3/GEN, mem/io: MEM, mem-tx: WR, part-proc: RES (no timeout)
> [Hardware Error]: CPU: 0, MCGc/s: 0/0, MC4: b607a10009080a23, ADDR/MISC: 0000000955647380/dead57ac1ba0babe, RIP: 00:<0000000000000000>, TSC: 0, TIME: 0, SOCKET: 0
> 
> 
>> non MCA drivers should also generate tracepoints;
> 
> yes, they should define a tracepoint which fits the hardware error
> reporting scheme they're using.

If nobody objects, I'll add my changes to linux-next, as it was tested on most
systems. I'll remove the MCE-specific tracepoint from my code, keeping the trace
there for the other stuff.

> 
> --
> diff --git a/arch/x86/include/asm/ras.h b/arch/x86/include/asm/ras.h
> index 92199af2ab7b..b51838514259 100644
> --- a/arch/x86/include/asm/ras.h
> +++ b/arch/x86/include/asm/ras.h
> @@ -6,8 +6,8 @@
>  extern bool ras_agent;
>  
>  #define PR_EMERG	BIT(0)
> -#define PR_WARNING	BIT(1)
> -#define PR_CONT		BIT(2)
> +#define PR_WARNING	BIT(4)
> +#define PR_CONT		BIT(8)
>  
>  extern const char *ras_get_decoded_err(void);
>  extern void ras_printk(unsigned long flags, const char *fmt, ...);
> diff --git a/arch/x86/ras/ras.c b/arch/x86/ras/ras.c
> index 5edfe30034d3..868d732c6cd4 100644
> --- a/arch/x86/ras/ras.c
> +++ b/arch/x86/ras/ras.c
> @@ -52,7 +52,7 @@ void ras_printk(unsigned long flags, const char *fmt, ...)
>  	if (!ras_agent) {
>  		if (flags & PR_EMERG)
>  			pr_emerg("%s", buf);
> -		if (flags & PR_WARNING)
> +		else if (flags & PR_WARNING)
>  			pr_warning("%s", buf);
>  		else if (flags & PR_CONT)
>  			pr_cont("%s", buf);
> diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
> index 29e153c57e33..c0f31f953c70 100644
> --- a/drivers/edac/amd64_edac.c
> +++ b/drivers/edac/amd64_edac.c
> @@ -1902,10 +1902,7 @@ static void amd64_handle_ce(struct mem_ctl_info *mci, struct mce *m)
>  	sys_addr = get_error_address(m);
>  	syndrome = extract_syndrome(m->status);
>  
> -	if (ras_agent)
> -		ras_printk(PR_EMERG, "ERR_ADDR: 0x%llx", sys_addr);
> -	else
> -		amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
> +	amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
>  
>  	pvt->ops->map_sysaddr_to_csrow(mci, sys_addr, syndrome);
>  }
> diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
> index e48ab3108ad8..62a32b3fa660 100644
> --- a/drivers/edac/edac_core.h
> +++ b/drivers/edac/edac_core.h
> @@ -49,8 +49,17 @@
>  #define edac_printk(level, prefix, fmt, arg...) \
>  	printk(level "EDAC " prefix ": " fmt, ##arg)
>  
> -#define edac_mc_printk(mci, level, fmt, arg...) \
> -	printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg)
> +#define edac_mc_printk(mci, level, fmt, arg...)					\
> +({										\
> +	if (ras_agent) {							\
> +		unsigned pr_lvl = BIT((unsigned)(level[1] - '0'));		\
> +										\
> +		ras_printk(pr_lvl, HW_ERR "EDAC MC%d" fmt,			\
> +			   mci->mc_idx, ##arg);					\
> +	}									\
> +	else									\
> +		printk(level "EDAC MC%d: " fmt, mci->mc_idx, ##arg);		\
> +})
>  
>  #define edac_mc_chipset_printk(mci, level, prefix, fmt, arg...) \
>  	printk(level "EDAC " prefix " MC%d: " fmt, mci->mc_idx, ##arg)
> diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
> index 3b3db477b5d0..446853a303c4 100644
> --- a/drivers/edac/edac_mc.c
> +++ b/drivers/edac/edac_mc.c
> @@ -703,11 +703,7 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci,
>  		return;
>  	}
>  
> -	if (edac_mc_get_log_ce()) {
> -		if (ras_agent)
> -			ras_printk(PR_CONT, ", row: %d, channel: %d\n",
> -				   row, channel);
> -		else
> +	if (edac_mc_get_log_ce())
>  			/* FIXME - put in DIMM location */
>  			edac_mc_printk(mci, KERN_WARNING,
>  					"CE page 0x%lx, offset 0x%lx, grain %d,"
> @@ -718,7 +714,6 @@ void edac_mc_handle_ce(struct mem_ctl_info *mci,
>  					row, channel,
>  					mci->csrows[row].channels[channel].label,
>  					msg);
> -	}
>  
>  	mci->ce_count++;
>  	mci->csrows[row].ce_count++;
> @@ -790,16 +785,12 @@ void edac_mc_handle_ue(struct mem_ctl_info *mci,
>  		pos += chars;
>  	}
>  
> -	if (edac_mc_get_log_ue()) {
> -		if (ras_agent)
> -			ras_printk(PR_CONT, "row: %d\n", row);
> -		else
> +	if (edac_mc_get_log_ue())
>  			edac_mc_printk(mci, KERN_EMERG,
>  				       "UE page 0x%lx, offset 0x%lx, grain %d,"
>  				       " row %d, labels \"%s\": %s\n",
>  				       page_frame_number, offset_in_page,
>  				       mci->csrows[row].grain, row, labels, msg);
> -	}
>  
>  	if (edac_mc_get_panic_on_ue())
>  		panic("EDAC MC%d: UE page 0x%lx, offset 0x%lx, grain %d, "
> 


  reply	other threads:[~2012-03-05 11:44 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-02 14:25 [RFC -v2 PATCH 0/3] RAS: Use MCE tracepoint for decoded MCEs Borislav Petkov
2012-03-02 14:25 ` [PATCH 1/4] mce: Slim up struct mce Borislav Petkov
2012-03-02 17:47   ` Luck, Tony
2012-03-03  7:37     ` Ingo Molnar
2012-03-05  9:17       ` Borislav Petkov
2012-03-02 14:25 ` [PATCH 2/4] mce: Add a msg string to the MCE tracepoint Borislav Petkov
2012-03-02 14:25 ` [PATCH 3/4] x86, RAS: Add a decoded msg buffer Borislav Petkov
2012-03-02 14:25 ` [PATCH 4/4] EDAC: Convert AMD EDAC pieces to use RAS printk buffer Borislav Petkov
2012-03-02 14:52   ` Mauro Carvalho Chehab
2012-03-05 11:04     ` Borislav Petkov
2012-03-05 11:43       ` Mauro Carvalho Chehab [this message]
2012-03-05 12:44         ` Borislav Petkov
2012-03-05 13:35           ` Mauro Carvalho Chehab
2012-03-05 14:13             ` Borislav Petkov
2012-03-05 14:58               ` Mauro Carvalho Chehab
2012-03-05 22:00                 ` [PATCHv5] EDAC core changes in order to properly report errors from all types of memory controllers Mauro Carvalho Chehab
2012-03-05 23:23                   ` Borislav Petkov
2012-03-06 11:31                     ` Mauro Carvalho Chehab
2012-03-06 12:16                       ` Borislav Petkov
2012-03-07  0:20                         ` [PATCHv7] " Mauro Carvalho Chehab
2012-03-07  8:42                           ` Borislav Petkov
2012-03-07 11:36                             ` Mauro Carvalho Chehab
2012-03-07 12:06                               ` Borislav Petkov
2012-03-07 12:13                                 ` Mauro Carvalho Chehab
2012-03-02 14:41 ` [RFC -v2 PATCH 0/3] RAS: Use MCE tracepoint for decoded MCEs Mauro Carvalho Chehab
2012-03-02 14:48   ` Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4F54A6FF.50502@redhat.com \
    --to=mchehab@redhat.com \
    --cc=borislav.petkov@amd.com \
    --cc=bp@amd64.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.