[PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
@ 2012-12-17 19:39 Jacob Shin
  2012-12-17 19:39 ` [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h Jacob Shin
  2012-12-17 19:57 ` [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well Joe Perches
  0 siblings, 2 replies; 25+ messages in thread
From: Jacob Shin @ 2012-12-17 19:39 UTC (permalink / raw)
  To: Borislav Petkov, Doug Thompson; +Cc: linux-edac, linux-kernel, Jacob Shin

Currently only AMD Family 15h processors have special handling for MC2
errors, since upcoming Family 16h will also need unique handling,
let's make MC2 handling part of amd_decoder_ops.

Signed-off-by: Jacob Shin <jacob.shin@amd.com>
---
 drivers/edac/mce_amd.c |   58 ++++++++++++++++++++++++++----------------------
 drivers/edac/mce_amd.h |    1 +
 2 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index ad63757..84320f9 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -399,12 +399,9 @@ static void decode_mc1_mce(struct mce *m)
 		pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
 }
 
-static void decode_mc2_mce(struct mce *m)
+static bool k8_mc2_mce(u16 ec, u8 xec)
 {
-	u16 ec = EC(m->status);
-	u8 xec = XEC(m->status, xec_mask);
-
-	pr_emerg(HW_ERR "MC2 Error");
+	bool ret = true;
 
 	if (xec == 0x1)
 		pr_cont(" in the write data buffers.\n");
@@ -429,24 +426,18 @@ static void decode_mc2_mce(struct mce *m)
 				pr_cont(": %s parity/ECC error during data "
 					"access from L2.\n", R4_MSG(ec));
 			else
-				goto wrong_mc2_mce;
+				ret = false;
 		} else
-			goto wrong_mc2_mce;
+			ret = false;
 	} else
-		goto wrong_mc2_mce;
-
-	return;
+		ret = false;
 
- wrong_mc2_mce:
-	pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
+	return ret;
 }
 
-static void decode_f15_mc2_mce(struct mce *m)
+static bool f15h_mc2_mce(u16 ec, u8 xec)
 {
-	u16 ec = EC(m->status);
-	u8 xec = XEC(m->status, xec_mask);
-
-	pr_emerg(HW_ERR "MC2 Error: ");
+	bool ret = true;
 
 	if (TLB_ERROR(ec)) {
 		if (xec == 0x0)
@@ -454,10 +445,10 @@ static void decode_f15_mc2_mce(struct mce *m)
 		else if (xec == 0x1)
 			pr_cont("Poison data provided for TLB fill.\n");
 		else
-			goto wrong_f15_mc2_mce;
+			ret = false;
 	} else if (BUS_ERROR(ec)) {
 		if (xec > 2)
-			goto wrong_f15_mc2_mce;
+			ret = false;
 
 		pr_cont("Error during attempted NB data read.\n");
 	} else if (MEM_ERROR(ec)) {
@@ -471,14 +462,24 @@ static void decode_f15_mc2_mce(struct mce *m)
 			break;
 
 		default:
-			goto wrong_f15_mc2_mce;
+			ret = false;
 		}
 	}
 
-	return;
+	return ret;
+}
 
- wrong_f15_mc2_mce:
-	pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
+static void decode_mc2_mce(struct mce *m)
+{
+	u16 ec = EC(m->status);
+	u8 xec = XEC(m->status, xec_mask);
+
+	pr_emerg(HW_ERR "MC2 Error: ");
+
+	if (fam_ops->mc2_mce(ec, xec))
+		;
+	else
+		pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
 }
 
 static void decode_mc3_mce(struct mce *m)
@@ -702,10 +703,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 		break;
 
 	case 2:
-		if (c->x86 == 0x15)
-			decode_f15_mc2_mce(m);
-		else
-			decode_mc2_mce(m);
+		decode_mc2_mce(m);
 		break;
 
 	case 3:
@@ -783,33 +781,39 @@ static int __init mce_amd_init(void)
 	case 0xf:
 		fam_ops->mc0_mce = k8_mc0_mce;
 		fam_ops->mc1_mce = k8_mc1_mce;
+		fam_ops->mc2_mce = k8_mc2_mce;
 		break;
 
 	case 0x10:
 		fam_ops->mc0_mce = f10h_mc0_mce;
 		fam_ops->mc1_mce = k8_mc1_mce;
+		fam_ops->mc2_mce = k8_mc2_mce;
 		break;
 
 	case 0x11:
 		fam_ops->mc0_mce = k8_mc0_mce;
 		fam_ops->mc1_mce = k8_mc1_mce;
+		fam_ops->mc2_mce = k8_mc2_mce;
 		break;
 
 	case 0x12:
 		fam_ops->mc0_mce = f12h_mc0_mce;
 		fam_ops->mc1_mce = k8_mc1_mce;
+		fam_ops->mc2_mce = k8_mc2_mce;
 		break;
 
 	case 0x14:
 		nb_err_cpumask  = 0x3;
 		fam_ops->mc0_mce = f14h_mc0_mce;
 		fam_ops->mc1_mce = f14h_mc1_mce;
+		fam_ops->mc2_mce = k8_mc2_mce;
 		break;
 
 	case 0x15:
 		xec_mask = 0x1f;
 		fam_ops->mc0_mce = f15h_mc0_mce;
 		fam_ops->mc1_mce = f15h_mc1_mce;
+		fam_ops->mc2_mce = f15h_mc2_mce;
 		break;
 
 	default:
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h
index 6796799..000f6e2 100644
--- a/drivers/edac/mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -78,6 +78,7 @@ extern const char * const ii_msgs[];
 struct amd_decoder_ops {
 	bool (*mc0_mce)(u16, u8);
 	bool (*mc1_mce)(u16, u8);
+	bool (*mc2_mce)(u16, u8);
 };
 
 void amd_report_gart_errors(bool);
-- 
1.7.9.5



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-17 19:39 [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well Jacob Shin
@ 2012-12-17 19:39 ` Jacob Shin
  2012-12-18 17:19   ` Borislav Petkov
  2012-12-17 19:57 ` [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well Joe Perches
  1 sibling, 1 reply; 25+ messages in thread
From: Jacob Shin @ 2012-12-17 19:39 UTC (permalink / raw)
  To: Borislav Petkov, Doug Thompson; +Cc: linux-edac, linux-kernel, Jacob Shin

Add MCE decoding logic for AMD Family 16h processors.

Signed-off-by: Jacob Shin <jacob.shin@amd.com>
---
 drivers/edac/mce_amd.c |  120 ++++++++++++++++++++++++++++++++++++++++++++++--
 drivers/edac/mce_amd.h |    6 +++
 2 files changed, 122 insertions(+), 4 deletions(-)

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index 84320f9..7d2d037 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -64,6 +64,10 @@ EXPORT_SYMBOL_GPL(to_msgs);
 const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
 EXPORT_SYMBOL_GPL(ii_msgs);
 
+/* internal error type */
+const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
+EXPORT_SYMBOL_GPL(uu_msgs);
+
 static const char * const f15h_mc1_mce_desc[] = {
 	"UC during a demand linefill from L2",
 	"Parity error during data load from IC",
@@ -275,6 +279,23 @@ static bool f15h_mc0_mce(u16 ec, u8 xec)
 	return ret;
 }
 
+static bool f16h_mc0_mce(u16 ec, u8 xec)
+{
+	u8 r4 = R4(ec);
+
+	if (MEM_ERROR(ec) && TT(ec) == TT_DATA && LL(ec) == LL_L1 &&
+	    (r4 == R4_DRD || r4 == R4_DWR)) {
+
+		pr_cont("%s parity error due to %s.\n",
+			(xec == 0x0    ? "Data" : "Tag"),
+			(r4  == R4_DRD ? "load" : "store"));
+
+		return true;
+	}
+
+	return f14h_mc0_mce(ec, xec);
+}
+
 static void decode_mc0_mce(struct mce *m)
 {
 	u16 ec = EC(m->status);
@@ -379,6 +400,36 @@ static bool f15h_mc1_mce(u16 ec, u8 xec)
 	return ret;
 }
 
+static bool f16h_mc1_mce(u16 ec, u8 xec)
+{
+	u8 r4    = R4(ec);
+	bool ret = true;
+
+	if (MEM_ERROR(ec)) {
+		if (TT(ec) != TT_INSTR)
+			ret = false;
+
+		else if (r4 == R4_IRD)
+			pr_cont("%s array parity error for a tag hit.\n",
+				(xec == 0x0 ? "Data" : "Tag"));
+
+		else if (r4 == R4_SNOOP)
+			pr_cont("Tag error during snoop/victimization.\n");
+
+		else if (xec == 0x0)
+			pr_cont("Tag parity error from victim castout.\n");
+
+		else if (xec == 0x2)
+			pr_cont("Microcode patch RAM parity error.\n");
+
+		else
+			ret = false;
+	} else
+		ret = false;
+
+	return ret;
+}
+
 static void decode_mc1_mce(struct mce *m)
 {
 	u16 ec = EC(m->status);
@@ -469,6 +520,48 @@ static bool f15h_mc2_mce(u16 ec, u8 xec)
 	return ret;
 }
 
+static bool f16h_mc2_mce(u16 ec, u8 xec)
+{
+	u8 r4    = R4(ec);
+	bool ret = true;
+
+	if (MEM_ERROR(ec) && TT(ec) == TT_GEN && LL(ec) == LL_L2) {
+		switch (xec) {
+		case 0x04 ... 0x05:
+			pr_cont("Parity error in %s.\n",
+				(r4 == R4_RD ? "IBUFF" : "OBUFF"));
+			break;
+
+		case 0x09 ... 0x0b:
+		case 0x0d ... 0x0f:
+			pr_cont("ECC error in L2 tag (%s).\n",
+				(r4 == R4_GEN   ? "BankReq" :
+				(r4 == R4_SNOOP ? "Probe"   : "Fill")));
+			break;
+
+		case 0x10 ... 0x1b:
+			pr_cont("ECC error in L2 data array (%s).\n",
+				(r4 == R4_RD    ? "Hit"  :
+				(r4 == R4_GEN   ? "Attr" :
+				(r4 == R4_EVICT ? "Vict" : "Fill"))));
+			break;
+
+		case 0x1c ... 0x1f:
+			pr_cont("Parity error in L2 attribute bits (%s).\n",
+				(r4 == R4_RD  ? "Hit"  :
+				(r4 == R4_GEN ? "Attr" : "Fill")));
+			break;
+
+		default:
+			ret = false;
+			break;
+		}
+	} else
+		ret = false;
+
+	return ret;
+}
+
 static void decode_mc2_mce(struct mce *m)
 {
 	u16 ec = EC(m->status);
@@ -548,7 +641,7 @@ static void decode_mc4_mce(struct mce *m)
 		return;
 
 	case 0x19:
-		if (boot_cpu_data.x86 == 0x15)
+		if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
 			pr_cont("Compute Unit Data Error.\n");
 		else
 			goto wrong_mc4_mce;
@@ -634,6 +727,10 @@ static void decode_mc6_mce(struct mce *m)
 
 static inline void amd_decode_err_code(u16 ec)
 {
+	if (INT_ERROR(ec)) {
+		pr_emerg(HW_ERR "internal: %s\n", LL_MSG(ec));
+		return;
+	}
 
 	pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
 
@@ -738,10 +835,18 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 		((m->status & MCI_STATUS_PCC)	? "PCC"	  : "-"),
 		((m->status & MCI_STATUS_ADDRV)	? "AddrV" : "-"));
 
-	if (c->x86 == 0x15)
-		pr_cont("|%s|%s",
+	if (c->x86 == 0x15 || c->x86 == 0x16) {
+		char coreid[16];
+
+		if (m->status & MCI_STATUS_COREIDV)
+			sprintf(coreid, "CoreIdV(Core%d)",
+				(int)ERR_CORE_ID(m->status));
+
+		pr_cont("|%s|%s|%s",
+			((m->status & MCI_STATUS_COREIDV)  ? coreid     : "-"),
 			((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
 			((m->status & MCI_STATUS_POISON)   ? "Poison"   : "-"));
+	}
 
 	/* do the two bits[14:13] together */
 	ecc = (m->status >> 45) & 0x3;
@@ -770,7 +875,7 @@ static int __init mce_amd_init(void)
 	if (c->x86_vendor != X86_VENDOR_AMD)
 		return 0;
 
-	if (c->x86 < 0xf || c->x86 > 0x15)
+	if (c->x86 < 0xf || c->x86 > 0x16)
 		return 0;
 
 	fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
@@ -816,6 +921,13 @@ static int __init mce_amd_init(void)
 		fam_ops->mc2_mce = f15h_mc2_mce;
 		break;
 
+	case 0x16:
+		xec_mask = 0x1f;
+		fam_ops->mc0_mce = f16h_mc0_mce;
+		fam_ops->mc1_mce = f16h_mc1_mce;
+		fam_ops->mc2_mce = f16h_mc2_mce;
+		break;
+
 	default:
 		printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
 		kfree(fam_ops);
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h
index 000f6e2..af2d20d 100644
--- a/drivers/edac/mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -11,9 +11,12 @@
 #define LOW_SYNDROME(x)			(((x) >> 15) & 0xff)
 #define HIGH_SYNDROME(x)		(((x) >> 24) & 0xff)
 
+#define ERR_CORE_ID(x)			(((x) >> 32) & 0xf)
+
 #define TLB_ERROR(x)			(((x) & 0xFFF0) == 0x0010)
 #define MEM_ERROR(x)			(((x) & 0xFF00) == 0x0100)
 #define BUS_ERROR(x)			(((x) & 0xF800) == 0x0800)
+#define INT_ERROR(x)			(((x) & 0xF4FF) == 0x0400)
 
 #define TT(x)				(((x) >> 2) & 0x3)
 #define TT_MSG(x)			tt_msgs[TT(x)]
@@ -25,10 +28,13 @@
 #define TO_MSG(x)			to_msgs[TO(x)]
 #define PP(x)				(((x) >> 9) & 0x3)
 #define PP_MSG(x)			pp_msgs[PP(x)]
+#define UU(x)				(((x) >> 8) & 0x3)
+#define UU_MSG(x)			uu_msgs[UU(x)]
 
 #define R4(x)				(((x) >> 4) & 0xf)
 #define R4_MSG(x)			((R4(x) < 9) ?  rrrr_msgs[R4(x)] : "Wrong R4!")
 
+#define MCI_STATUS_COREIDV		BIT_64(56)
 #define MCI_STATUS_DEFERRED		BIT_64(44)
 #define MCI_STATUS_POISON		BIT_64(43)
 
-- 
1.7.9.5



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-17 19:39 ` [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h Jacob Shin
@ 2012-12-18 17:19   ` Borislav Petkov
  2012-12-18 17:30     ` Jacob Shin
  2012-12-18 18:24     ` Joe Perches
  0 siblings, 2 replies; 25+ messages in thread
From: Borislav Petkov @ 2012-12-18 17:19 UTC (permalink / raw)
  To: Jacob Shin; +Cc: Doug Thompson, linux-edac, linux-kernel

On Mon, Dec 17, 2012 at 01:39:48PM -0600, Jacob Shin wrote:
> Add MCE decoding logic for AMD Family 16h processors.
> 
> Signed-off-by: Jacob Shin <jacob.shin@amd.com>
> ---
>  drivers/edac/mce_amd.c |  120 ++++++++++++++++++++++++++++++++++++++++++++++--
>  drivers/edac/mce_amd.h |    6 +++
>  2 files changed, 122 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
> index 84320f9..7d2d037 100644
> --- a/drivers/edac/mce_amd.c
> +++ b/drivers/edac/mce_amd.c
> @@ -64,6 +64,10 @@ EXPORT_SYMBOL_GPL(to_msgs);
>  const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
>  EXPORT_SYMBOL_GPL(ii_msgs);
>  
> +/* internal error type */
> +const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
> +EXPORT_SYMBOL_GPL(uu_msgs);

Seems like those aren't used anywhere?

>  static const char * const f15h_mc1_mce_desc[] = {
>  	"UC during a demand linefill from L2",
>  	"Parity error during data load from IC",
> @@ -275,6 +279,23 @@ static bool f15h_mc0_mce(u16 ec, u8 xec)
>  	return ret;
>  }
>  
> +static bool f16h_mc0_mce(u16 ec, u8 xec)
> +{
> +	u8 r4 = R4(ec);
> +
> +	if (MEM_ERROR(ec) && TT(ec) == TT_DATA && LL(ec) == LL_L1 &&
> +	    (r4 == R4_DRD || r4 == R4_DWR)) {
> +
> +		pr_cont("%s parity error due to %s.\n",
> +			(xec == 0x0    ? "Data" : "Tag"),
> +			(r4  == R4_DRD ? "load" : "store"));
> +
> +		return true;
> +	}
> +
> +	return f14h_mc0_mce(ec, xec);

Looks like this could be merged with f14h_mc0_mce no? You can call the
function then cat_mc0_mce (for all the *cat cores) and assign it to
fam_ops->mc0_mce in the f14h and f16h case.

> +}
> +
>  static void decode_mc0_mce(struct mce *m)
>  {
>  	u16 ec = EC(m->status);
> @@ -379,6 +400,36 @@ static bool f15h_mc1_mce(u16 ec, u8 xec)
>  	return ret;
>  }
>  
> +static bool f16h_mc1_mce(u16 ec, u8 xec)
> +{
> +	u8 r4    = R4(ec);
> +	bool ret = true;
> +
> +	if (MEM_ERROR(ec)) {
> +		if (TT(ec) != TT_INSTR)
> +			ret = false;
> +
> +		else if (r4 == R4_IRD)
> +			pr_cont("%s array parity error for a tag hit.\n",
> +				(xec == 0x0 ? "Data" : "Tag"));
> +
> +		else if (r4 == R4_SNOOP)
> +			pr_cont("Tag error during snoop/victimization.\n");
> +
> +		else if (xec == 0x0)
> +			pr_cont("Tag parity error from victim castout.\n");
> +
> +		else if (xec == 0x2)
> +			pr_cont("Microcode patch RAM parity error.\n");

Also no need for a family-special function - just rename f14h_mc1_mce
to cat_mc1_mce() as above and add a special case like this as the last
else-branch of the if conditional there:

+		if (boot_cpu_data.x86 == 0x16) {
+			if (LL(ec) == LL_LG && xec == 2)
+				pr_cont("Microcode patch RAM parity error.\n");
+			else
+				pr_cont("IC Tag parity error from victim castout.\n");
+			return true;
+		}

> +
> +		else
> +			ret = false;
> +	} else
> +		ret = false;
> +
> +	return ret;
> +}
> +
>  static void decode_mc1_mce(struct mce *m)
>  {
>  	u16 ec = EC(m->status);
> @@ -469,6 +520,48 @@ static bool f15h_mc2_mce(u16 ec, u8 xec)
>  	return ret;
>  }
>  
> +static bool f16h_mc2_mce(u16 ec, u8 xec)
> +{
> +	u8 r4    = R4(ec);
> +	bool ret = true;
> +
> +	if (MEM_ERROR(ec) && TT(ec) == TT_GEN && LL(ec) == LL_L2) {

You can exit early here:

	if (!MEM_ERROR(ec))
		return false;

Also, no need to test for TT and LL - we're relying on the hardware here
and if those values are b0rked then we have a more serious problem.

> +		switch (xec) {
> +		case 0x04 ... 0x05:
> +			pr_cont("Parity error in %s.\n",
> +				(r4 == R4_RD ? "IBUFF" : "OBUFF"));

or
			pr_cont("%sBUFF parity error.\n", ((xec == 4) ? "I" : "O"));

> +			break;
> +
> +		case 0x09 ... 0x0b:
> +		case 0x0d ... 0x0f:
> +			pr_cont("ECC error in L2 tag (%s).\n",
> +				(r4 == R4_GEN   ? "BankReq" :
> +				(r4 == R4_SNOOP ? "Probe"   : "Fill")));
> +			break;
> +
> +		case 0x10 ... 0x1b:
> +			pr_cont("ECC error in L2 data array (%s).\n",
> +				(r4 == R4_RD    ? "Hit"  :
> +				(r4 == R4_GEN   ? "Attr" :
> +				(r4 == R4_EVICT ? "Vict" : "Fill"))));
> +			break;
> +
> +		case 0x1c ... 0x1f:
> +			pr_cont("Parity error in L2 attribute bits (%s).\n",
> +				(r4 == R4_RD  ? "Hit"  :
> +				(r4 == R4_GEN ? "Attr" : "Fill")));
> +			break;
> +
> +		default:
> +			ret = false;
> +			break;
> +		}
> +	} else
> +		ret = false;
> +
> +	return ret;
> +}
> +
>  static void decode_mc2_mce(struct mce *m)
>  {
>  	u16 ec = EC(m->status);
> @@ -548,7 +641,7 @@ static void decode_mc4_mce(struct mce *m)
>  		return;
>  
>  	case 0x19:
> -		if (boot_cpu_data.x86 == 0x15)
> +		if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
>  			pr_cont("Compute Unit Data Error.\n");
>  		else
>  			goto wrong_mc4_mce;
> @@ -634,6 +727,10 @@ static void decode_mc6_mce(struct mce *m)
>  
>  static inline void amd_decode_err_code(u16 ec)
>  {
> +	if (INT_ERROR(ec)) {
> +		pr_emerg(HW_ERR "internal: %s\n", LL_MSG(ec));
> +		return;
> +	}

Is this correct? I'm just confirming because I don't have the internal
info anymore.

Uuh, hold on, maybe those otherwise unused uu_msgs above were meant to
be used here instead of the LL_MSG? IOW,

		pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));

Right?

>  
>  	pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
>  
> @@ -738,10 +835,18 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
>  		((m->status & MCI_STATUS_PCC)	? "PCC"	  : "-"),
>  		((m->status & MCI_STATUS_ADDRV)	? "AddrV" : "-"));
>  
> -	if (c->x86 == 0x15)
> -		pr_cont("|%s|%s",
> +	if (c->x86 == 0x15 || c->x86 == 0x16) {
> +		char coreid[16];
> +
> +		if (m->status & MCI_STATUS_COREIDV)
> +			sprintf(coreid, "CoreIdV(Core%d)",
> +				(int)ERR_CORE_ID(m->status));

Uuh, no, this is probably dumping the core which detected the error.. No
need since we're dumping the core reporting the error anyway above. And
if that's mismatched for some reason, we're also dumping full MCi_STATUS
contents so you can decypher CoreId from there if needed.

Thanks.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-18 17:19   ` Borislav Petkov
@ 2012-12-18 17:30     ` Jacob Shin
  2012-12-18 18:09       ` Jacob Shin
  2012-12-18 18:24     ` Joe Perches
  1 sibling, 1 reply; 25+ messages in thread
From: Jacob Shin @ 2012-12-18 17:30 UTC (permalink / raw)
  To: Borislav Petkov, Doug Thompson, linux-edac, linux-kernel

On Tue, Dec 18, 2012 at 06:19:15PM +0100, Borislav Petkov wrote:
> On Mon, Dec 17, 2012 at 01:39:48PM -0600, Jacob Shin wrote:
> > Add MCE decoding logic for AMD Family 16h processors.
> > 
> > Signed-off-by: Jacob Shin <jacob.shin@amd.com>
> > ---
> >  drivers/edac/mce_amd.c |  120 ++++++++++++++++++++++++++++++++++++++++++++++--
> >  drivers/edac/mce_amd.h |    6 +++
> >  2 files changed, 122 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
> > index 84320f9..7d2d037 100644
> > --- a/drivers/edac/mce_amd.c
> > +++ b/drivers/edac/mce_amd.c
> > @@ -64,6 +64,10 @@ EXPORT_SYMBOL_GPL(to_msgs);
> >  const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
> >  EXPORT_SYMBOL_GPL(ii_msgs);
> >  
> > +/* internal error type */
> > +const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
> > +EXPORT_SYMBOL_GPL(uu_msgs);
> 
> Seems like those aren't used anywhere?
> 
> >  static const char * const f15h_mc1_mce_desc[] = {
> >  	"UC during a demand linefill from L2",
> >  	"Parity error during data load from IC",
> > @@ -275,6 +279,23 @@ static bool f15h_mc0_mce(u16 ec, u8 xec)
> >  	return ret;
> >  }
> >  
> > +static bool f16h_mc0_mce(u16 ec, u8 xec)
> > +{
> > +	u8 r4 = R4(ec);
> > +
> > +	if (MEM_ERROR(ec) && TT(ec) == TT_DATA && LL(ec) == LL_L1 &&
> > +	    (r4 == R4_DRD || r4 == R4_DWR)) {
> > +
> > +		pr_cont("%s parity error due to %s.\n",
> > +			(xec == 0x0    ? "Data" : "Tag"),
> > +			(r4  == R4_DRD ? "load" : "store"));
> > +
> > +		return true;
> > +	}
> > +
> > +	return f14h_mc0_mce(ec, xec);
> 
> Looks like this could be merged with f14h_mc0_mce no? You can call the
> function then cat_mc0_mce (for all the *cat cores) and assign it to
> fam_ops->mc0_mce in the f14h and f16h case.

Okay

> 
> > +}
> > +
> >  static void decode_mc0_mce(struct mce *m)
> >  {
> >  	u16 ec = EC(m->status);
> > @@ -379,6 +400,36 @@ static bool f15h_mc1_mce(u16 ec, u8 xec)
> >  	return ret;
> >  }
> >  
> > +static bool f16h_mc1_mce(u16 ec, u8 xec)
> > +{
> > +	u8 r4    = R4(ec);
> > +	bool ret = true;
> > +
> > +	if (MEM_ERROR(ec)) {
> > +		if (TT(ec) != TT_INSTR)
> > +			ret = false;
> > +
> > +		else if (r4 == R4_IRD)
> > +			pr_cont("%s array parity error for a tag hit.\n",
> > +				(xec == 0x0 ? "Data" : "Tag"));
> > +
> > +		else if (r4 == R4_SNOOP)
> > +			pr_cont("Tag error during snoop/victimization.\n");
> > +
> > +		else if (xec == 0x0)
> > +			pr_cont("Tag parity error from victim castout.\n");
> > +
> > +		else if (xec == 0x2)
> > +			pr_cont("Microcode patch RAM parity error.\n");
> 
> Also no need for a family-special function - just rename f14h_mc1_mce
> to cat_mc1_mce() as above and add a special case like this as the last
> else-branch of the if conditional there:

Okay

> 
> +		if (boot_cpu_data.x86 == 0x16) {
> +			if (LL(ec) == LL_LG && xec == 2)
> +				pr_cont("Microcode patch RAM parity error.\n");
> +			else
> +				pr_cont("IC Tag parity error from victim castout.\n");
> +			return true;
> +		}
> 
> > +
> > +		else
> > +			ret = false;
> > +	} else
> > +		ret = false;
> > +
> > +	return ret;
> > +}
> > +
> >  static void decode_mc1_mce(struct mce *m)
> >  {
> >  	u16 ec = EC(m->status);
> > @@ -469,6 +520,48 @@ static bool f15h_mc2_mce(u16 ec, u8 xec)
> >  	return ret;
> >  }
> >  
> > +static bool f16h_mc2_mce(u16 ec, u8 xec)
> > +{
> > +	u8 r4    = R4(ec);
> > +	bool ret = true;
> > +
> > +	if (MEM_ERROR(ec) && TT(ec) == TT_GEN && LL(ec) == LL_L2) {
> 
> You can exit early here:
> 
> 	if (!MEM_ERROR(ec))
> 		return false;
> 
> Also, no need to test for TT and LL - we're relying on the hardware here
> and if those values are b0rked then we have a more serious problem.

Okay

> 
> > +		switch (xec) {
> > +		case 0x04 ... 0x05:
> > +			pr_cont("Parity error in %s.\n",
> > +				(r4 == R4_RD ? "IBUFF" : "OBUFF"));
> 
> or
> 			pr_cont("%sBUFF parity error.\n", ((xec == 4) ? "I" : "O"));
> 
> > +			break;
> > +
> > +		case 0x09 ... 0x0b:
> > +		case 0x0d ... 0x0f:
> > +			pr_cont("ECC error in L2 tag (%s).\n",
> > +				(r4 == R4_GEN   ? "BankReq" :
> > +				(r4 == R4_SNOOP ? "Probe"   : "Fill")));
> > +			break;
> > +
> > +		case 0x10 ... 0x1b:
> > +			pr_cont("ECC error in L2 data array (%s).\n",
> > +				(r4 == R4_RD    ? "Hit"  :
> > +				(r4 == R4_GEN   ? "Attr" :
> > +				(r4 == R4_EVICT ? "Vict" : "Fill"))));
> > +			break;
> > +
> > +		case 0x1c ... 0x1f:
> > +			pr_cont("Parity error in L2 attribute bits (%s).\n",
> > +				(r4 == R4_RD  ? "Hit"  :
> > +				(r4 == R4_GEN ? "Attr" : "Fill")));
> > +			break;
> > +
> > +		default:
> > +			ret = false;
> > +			break;
> > +		}
> > +	} else
> > +		ret = false;
> > +
> > +	return ret;
> > +}
> > +
> >  static void decode_mc2_mce(struct mce *m)
> >  {
> >  	u16 ec = EC(m->status);
> > @@ -548,7 +641,7 @@ static void decode_mc4_mce(struct mce *m)
> >  		return;
> >  
> >  	case 0x19:
> > -		if (boot_cpu_data.x86 == 0x15)
> > +		if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
> >  			pr_cont("Compute Unit Data Error.\n");
> >  		else
> >  			goto wrong_mc4_mce;
> > @@ -634,6 +727,10 @@ static void decode_mc6_mce(struct mce *m)
> >  
> >  static inline void amd_decode_err_code(u16 ec)
> >  {
> > +	if (INT_ERROR(ec)) {
> > +		pr_emerg(HW_ERR "internal: %s\n", LL_MSG(ec));
> > +		return;
> > +	}
> 
> Is this correct? I'm just confirming because I don't have the internal
> info anymore.
> 
> Uuh, hold on, maybe those otherwise unused uu_msgs above were meant to
> be used here instead of the LL_MSG? IOW,

> 
> 		pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
> 
> Right?

Ah, yes thats right, sorry about the typo. It looks like:

Error Code           Error Code Type        Description
0000 01UU 0000 0000  Internal Unclassified  UU = Internal Error Type

And the UU encoding is as is in the mce_amd.h file

> 
> >  
> >  	pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
> >  
> > @@ -738,10 +835,18 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
> >  		((m->status & MCI_STATUS_PCC)	? "PCC"	  : "-"),
> >  		((m->status & MCI_STATUS_ADDRV)	? "AddrV" : "-"));
> >  
> > -	if (c->x86 == 0x15)
> > -		pr_cont("|%s|%s",
> > +	if (c->x86 == 0x15 || c->x86 == 0x16) {
> > +		char coreid[16];
> > +
> > +		if (m->status & MCI_STATUS_COREIDV)
> > +			sprintf(coreid, "CoreIdV(Core%d)",
> > +				(int)ERR_CORE_ID(m->status));
> 
> Uuh, no, this is probably dumping the core which detected the error.. No
> need since we're dumping the core reporting the error anyway above. And
> if that's mismatched for some reason, we're also dumping full MCi_STATUS
> contents so you can decypher CoreId from there if needed.

Okay, will take this out.

Thanks for the feedback! I'll spin a V2 and send it out later today.

-Jacob

> 
> Thanks.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> Sent from a fat crate under my desk. Formatting is fine.
> --
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-18 17:30     ` Jacob Shin
@ 2012-12-18 18:09       ` Jacob Shin
  2012-12-18 18:32         ` Borislav Petkov
  0 siblings, 1 reply; 25+ messages in thread
From: Jacob Shin @ 2012-12-18 18:09 UTC (permalink / raw)
  To: Borislav Petkov, Doug Thompson, linux-edac, linux-kernel

On Tue, Dec 18, 2012 at 11:30:24AM -0600, Jacob Shin wrote:
> On Tue, Dec 18, 2012 at 06:19:15PM +0100, Borislav Petkov wrote:
> > On Mon, Dec 17, 2012 at 01:39:48PM -0600, Jacob Shin wrote:

> > > +/* internal error type */
> > > +const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
> > > +EXPORT_SYMBOL_GPL(uu_msgs);
> > 
> > Seems like those aren't used anywhere?

> > > @@ -634,6 +727,10 @@ static void decode_mc6_mce(struct mce *m)
> > >  
> > >  static inline void amd_decode_err_code(u16 ec)
> > >  {
> > > +	if (INT_ERROR(ec)) {
> > > +		pr_emerg(HW_ERR "internal: %s\n", LL_MSG(ec));
> > > +		return;
> > > +	}
> > 
> > Is this correct? I'm just confirming because I don't have the internal
> > info anymore.
> > 
> > Uuh, hold on, maybe those otherwise unused uu_msgs above were meant to
> > be used here instead of the LL_MSG? IOW,
> 
> > 
> > 		pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
> > 
> > Right?
> 
> Ah, yes thats right, sorry about the typo. It looks like:
> 
> Error Code           Error Code Type        Description
> 0000 01UU 0000 0000  Internal Unclassified  UU = Internal Error Type
> 
> And the UU encoding is as is in the mce_amd.h file

I think I meant to say as in uu_msgs[] above. HWA stands for
"Hardware Assertion" ..


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-18 18:09       ` Jacob Shin
@ 2012-12-18 18:32         ` Borislav Petkov
  0 siblings, 0 replies; 25+ messages in thread
From: Borislav Petkov @ 2012-12-18 18:32 UTC (permalink / raw)
  To: Jacob Shin; +Cc: Doug Thompson, linux-edac, linux-kernel

On Tue, Dec 18, 2012 at 12:09:45PM -0600, Jacob Shin wrote:
> I think I meant to say as in uu_msgs[] above. HWA stands for "Hardware
> Assertion" ..

Yep, I got it. You mean this:

const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };

and HWA is bit setting 0x2. You don't need to add a comment to say what
HWA means because I'm assuming the BKDG will have it when it comes out.

Thanks.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-18 17:19   ` Borislav Petkov
  2012-12-18 17:30     ` Jacob Shin
@ 2012-12-18 18:24     ` Joe Perches
  2012-12-18 18:33       ` Borislav Petkov
  1 sibling, 1 reply; 25+ messages in thread
From: Joe Perches @ 2012-12-18 18:24 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Jacob Shin, Doug Thompson, linux-edac, linux-kernel

On Tue, 2012-12-18 at 18:19 +0100, Borislav Petkov wrote:
> On Mon, Dec 17, 2012 at 01:39:48PM -0600, Jacob Shin wrote:
> > Add MCE decoding logic for AMD Family 16h processors.
[]
> > +		switch (xec) {
> > +		case 0x04 ... 0x05:
> > +			pr_cont("Parity error in %s.\n",
> > +				(r4 == R4_RD ? "IBUFF" : "OBUFF"));
> 
> or
> 			pr_cont("%sBUFF parity error.\n", ((xec == 4) ? "I" : "O"));

or without all the unnecessary parens and using char:

			pr_cont("%cBUFF parity error\n", xec == 4 ? 'I' : 'O');



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-18 18:24     ` Joe Perches
@ 2012-12-18 18:33       ` Borislav Petkov
  2012-12-18 18:37         ` Joe Perches
  0 siblings, 1 reply; 25+ messages in thread
From: Borislav Petkov @ 2012-12-18 18:33 UTC (permalink / raw)
  To: Joe Perches; +Cc: Jacob Shin, Doug Thompson, linux-edac, linux-kernel

On Tue, Dec 18, 2012 at 10:24:29AM -0800, Joe Perches wrote:
> or without all the unnecessary parens and using char:
> 
> 			pr_cont("%cBUFF parity error\n", xec == 4 ? 'I' : 'O');

Char is fine, the parens make this more readable when you look at it
again after a couple of months :-).

Thanks.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-18 18:33       ` Borislav Petkov
@ 2012-12-18 18:37         ` Joe Perches
  2012-12-18 19:00           ` Mauro Carvalho Chehab
  0 siblings, 1 reply; 25+ messages in thread
From: Joe Perches @ 2012-12-18 18:37 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Jacob Shin, Doug Thompson, linux-edac, linux-kernel

On Tue, 2012-12-18 at 19:33 +0100, Borislav Petkov wrote:
> On Tue, Dec 18, 2012 at 10:24:29AM -0800, Joe Perches wrote:
> > or without all the unnecessary parens and using char:
> > 
> > 			pr_cont("%cBUFF parity error\n", xec == 4 ? 'I' : 'O');
> 
> Char is fine, the parens make this more readable when you look at it
> again after a couple of months :-).

<shrug>

I think the unnecessary parens make it _less_ readable
because you need to parse more stuff and it's easy to
misplace those parens.

cheers, Joe


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-18 18:37         ` Joe Perches
@ 2012-12-18 19:00           ` Mauro Carvalho Chehab
  0 siblings, 0 replies; 25+ messages in thread
From: Mauro Carvalho Chehab @ 2012-12-18 19:00 UTC (permalink / raw)
  To: Joe Perches
  Cc: Borislav Petkov, Jacob Shin, Doug Thompson, linux-edac,
	linux-kernel

Em 18-12-2012 16:37, Joe Perches escreveu:
> On Tue, 2012-12-18 at 19:33 +0100, Borislav Petkov wrote:
>> On Tue, Dec 18, 2012 at 10:24:29AM -0800, Joe Perches wrote:
>>> or without all the unnecessary parens and using char:
>>>
>>> 			pr_cont("%cBUFF parity error\n", xec == 4 ? 'I' : 'O');
>>
>> Char is fine, the parens make this more readable when you look at it
>> again after a couple of months :-).
>
> <shrug>
>
> I think the unnecessary parens make it _less_ readable
> because you need to parse more stuff and it's easy to
> misplace those parens.

I agree with Boris: the parenthesis at the test (xec == 4) makes it
easier to read, IMO.

Regards,
Mauro

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  2012-12-17 19:39 [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well Jacob Shin
  2012-12-17 19:39 ` [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h Jacob Shin
@ 2012-12-17 19:57 ` Joe Perches
  2012-12-17 20:05   ` Borislav Petkov
  1 sibling, 1 reply; 25+ messages in thread
From: Joe Perches @ 2012-12-17 19:57 UTC (permalink / raw)
  To: Jacob Shin; +Cc: Borislav Petkov, Doug Thompson, linux-edac, linux-kernel

On Mon, 2012-12-17 at 13:39 -0600, Jacob Shin wrote:
> Currently only AMD Family 15h processors have special handling for MC2
> errors, since upcoming Family 16h will also need unique handling,
> let's make MC2 handling part of amd_decoder_ops.

[]

> diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
[]
> @@ -399,12 +399,9 @@ static void decode_mc1_mce(struct mce *m)
>  		pr_emerg(HW_ERR "Corrupted MC1 MCE info?\n");
>  }
>  
> -static void decode_mc2_mce(struct mce *m)
> +static bool k8_mc2_mce(u16 ec, u8 xec)
>  {
> -	u16 ec = EC(m->status);
> -	u8 xec = XEC(m->status, xec_mask);
> -
> -	pr_emerg(HW_ERR "MC2 Error");
> +	bool ret = true;
>  
>  	if (xec == 0x1)
>  		pr_cont(" in the write data buffers.\n");

It'd be better to change the pr_cont uses to pr_emerg

[]

> +static void decode_mc2_mce(struct mce *m)
> +{
> +	u16 ec = EC(m->status);
> +	u8 xec = XEC(m->status, xec_mask);
> +
> +	pr_emerg(HW_ERR "MC2 Error: ");

Remove this and

> +	if (fam_ops->mc2_mce(ec, xec))
> +		;
> +	else
> +		pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
>  }

And make this

	if (!fam_ops->mc2_mce(ec, xec))
		pr_emerg(etc...);



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  2012-12-17 19:57 ` [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well Joe Perches
@ 2012-12-17 20:05   ` Borislav Petkov
  2012-12-17 20:16     ` Joe Perches
  0 siblings, 1 reply; 25+ messages in thread
From: Borislav Petkov @ 2012-12-17 20:05 UTC (permalink / raw)
  To: Joe Perches; +Cc: Jacob Shin, Doug Thompson, linux-edac, linux-kernel

On Mon, Dec 17, 2012 at 11:57:01AM -0800, Joe Perches wrote:
> It'd be better to change the pr_cont uses to pr_emerg
> 
> []
> 
> > +static void decode_mc2_mce(struct mce *m)
> > +{
> > +	u16 ec = EC(m->status);
> > +	u8 xec = XEC(m->status, xec_mask);
> > +
> > +	pr_emerg(HW_ERR "MC2 Error: ");
> 
> Remove this and
> 
> > +	if (fam_ops->mc2_mce(ec, xec))
> > +		;
> > +	else
> > +		pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
> >  }
> 
> And make this
> 
> 	if (!fam_ops->mc2_mce(ec, xec))
> 		pr_emerg(etc...);

No, this is not how we do this here. We do pr_emerg in the main per-bank
function, i.e. mc0, mc1, mc2... and we finish the line in the respective
function with pr_cont.

If your fear is line interleaving, then this shouldn't happen in most
cases because we're in atomic #MC context and nothing else is executing
in that case. (And I haven't seen it interleave in all my testing so
far).

If it does interleave when we have a non-critical error detected and
reported in process context, then this whole decoding code needs a lot
more work than this.

Thanks.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  2012-12-17 20:05   ` Borislav Petkov
@ 2012-12-17 20:16     ` Joe Perches
  2012-12-17 20:22       ` Borislav Petkov
  0 siblings, 1 reply; 25+ messages in thread
From: Joe Perches @ 2012-12-17 20:16 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Jacob Shin, Doug Thompson, linux-edac, linux-kernel

On Mon, 2012-12-17 at 21:05 +0100, Borislav Petkov wrote:
> On Mon, Dec 17, 2012 at 11:57:01AM -0800, Joe Perches wrote:
> > It'd be better to change the pr_cont uses to pr_emerg
> > 
> > []
> > 
> > > +static void decode_mc2_mce(struct mce *m)
> > > +{
> > > +	u16 ec = EC(m->status);
> > > +	u8 xec = XEC(m->status, xec_mask);
> > > +
> > > +	pr_emerg(HW_ERR "MC2 Error: ");

> > 
> > Remove this and
> > 
> > > +	if (fam_ops->mc2_mce(ec, xec))
> > > +		;
> > > +	else
> > > +		pr_emerg(HW_ERR "Corrupted MC2 MCE info?\n");
> > >  }
> > 
> > And make this
> > 
> > 	if (!fam_ops->mc2_mce(ec, xec))
> > 		pr_emerg(etc...);
> 
> No, this is not how we do this here. We do pr_emerg in the main per-bank
> function, i.e. mc0, mc1, mc2... and we finish the line in the respective
> function with pr_cont.
> 
> If your fear is line interleaving, then this shouldn't happen in most
> cases because we're in atomic #MC context and nothing else is executing
> in that case. (And I haven't seen it interleave in all my testing so
> far).
> 
> If it does interleave when we have a non-critical error detected and
> reported in process context, then this whole decoding code needs a lot
> more work than this.
> 

You also have paths where you start a pr_emerg and do not terminate
it when there is no apparent error at all.



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  2012-12-17 20:16     ` Joe Perches
@ 2012-12-17 20:22       ` Borislav Petkov
  2012-12-17 20:34         ` Joe Perches
  0 siblings, 1 reply; 25+ messages in thread
From: Borislav Petkov @ 2012-12-17 20:22 UTC (permalink / raw)
  To: Joe Perches; +Cc: Jacob Shin, Doug Thompson, linux-edac, linux-kernel

On Mon, Dec 17, 2012 at 12:16:16PM -0800, Joe Perches wrote:
> You also have paths where you start a pr_emerg and do not terminate it
> when there is no apparent error at all.

For example?

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  2012-12-17 20:22       ` Borislav Petkov
@ 2012-12-17 20:34         ` Joe Perches
  2012-12-17 20:40           ` Borislav Petkov
  0 siblings, 1 reply; 25+ messages in thread
From: Joe Perches @ 2012-12-17 20:34 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Jacob Shin, Doug Thompson, linux-edac, linux-kernel

On Mon, 2012-12-17 at 21:22 +0100, Borislav Petkov wrote:
> On Mon, Dec 17, 2012 at 12:16:16PM -0800, Joe Perches wrote:
> > You also have paths where you start a pr_emerg and do not terminate it
> > when there is no apparent error at all.
> 
> For example?
> 

decode_mc2_mce does

	pr_emerg(HW_ERR "MC2 Error: ");
calls
	fam_ops->mc2_mce()
which may not emit anything
and if it doesn't, does
	pr_emerg(HW_ERR "Corrupted ...");
on another line leaving a trailing colon without
newline from the pr_emerg above.

I believe the second pr_emerg should be pr_cont

cheers, Joe


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  2012-12-17 20:34         ` Joe Perches
@ 2012-12-17 20:40           ` Borislav Petkov
  2012-12-17 20:43             ` Jacob Shin
  0 siblings, 1 reply; 25+ messages in thread
From: Borislav Petkov @ 2012-12-17 20:40 UTC (permalink / raw)
  To: Joe Perches; +Cc: Jacob Shin, Doug Thompson, linux-edac, linux-kernel

On Mon, Dec 17, 2012 at 12:34:35PM -0800, Joe Perches wrote:
> decode_mc2_mce does
> 
> 	pr_emerg(HW_ERR "MC2 Error: ");
> calls
> 	fam_ops->mc2_mce()
> which may not emit anything
> and if it doesn't, does
> 	pr_emerg(HW_ERR "Corrupted ...");
> on another line leaving a trailing colon without
> newline from the pr_emerg above.

Ok, this is in Jacob's patch AFAICT, I'll review those tomorrow but
thanks for the heads-up.

> I believe the second pr_emerg should be pr_cont

Actually, it should be

pr_emerg(HW_ERR "\nCorrupted...\n");

to finish the initial pr_emerg.

Thanks.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  2012-12-17 20:40           ` Borislav Petkov
@ 2012-12-17 20:43             ` Jacob Shin
  2012-12-17 20:59               ` Joe Perches
  0 siblings, 1 reply; 25+ messages in thread
From: Jacob Shin @ 2012-12-17 20:43 UTC (permalink / raw)
  To: Borislav Petkov, Joe Perches, Doug Thompson, linux-edac,
	linux-kernel

On Mon, Dec 17, 2012 at 09:40:11PM +0100, Borislav Petkov wrote:
> On Mon, Dec 17, 2012 at 12:34:35PM -0800, Joe Perches wrote:
> > decode_mc2_mce does
> > 
> > 	pr_emerg(HW_ERR "MC2 Error: ");
> > calls
> > 	fam_ops->mc2_mce()
> > which may not emit anything
> > and if it doesn't, does
> > 	pr_emerg(HW_ERR "Corrupted ...");
> > on another line leaving a trailing colon without
> > newline from the pr_emerg above.
> 
> Ok, this is in Jacob's patch AFAICT, I'll review those tomorrow but
> thanks for the heads-up.

Okay, I'll wait until your feedback before sending V2

> 
> > I believe the second pr_emerg should be pr_cont
> 
> Actually, it should be
> 
> pr_emerg(HW_ERR "\nCorrupted...\n");
> 
> to finish the initial pr_emerg.

Okay, all of the other decode_mc*_mce functions needs to change then.

Thanks!

> 
> Thanks.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> Sent from a fat crate under my desk. Formatting is fine.
> --
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  2012-12-17 20:43             ` Jacob Shin
@ 2012-12-17 20:59               ` Joe Perches
  2012-12-17 21:08                 ` Borislav Petkov
  2012-12-17 21:09                 ` Jacob Shin
  0 siblings, 2 replies; 25+ messages in thread
From: Joe Perches @ 2012-12-17 20:59 UTC (permalink / raw)
  To: Jacob Shin; +Cc: Borislav Petkov, Doug Thompson, linux-edac, linux-kernel

On Mon, 2012-12-17 at 14:43 -0600, Jacob Shin wrote:
> On Mon, Dec 17, 2012 at 09:40:11PM +0100, Borislav Petkov wrote:
> > On Mon, Dec 17, 2012 at 12:34:35PM -0800, Joe Perches wrote:
> > > decode_mc2_mce does
> > > 
> > > 	pr_emerg(HW_ERR "MC2 Error: ");
> > > calls
> > > 	fam_ops->mc2_mce()
> > > which may not emit anything
> > > and if it doesn't, does
> > > 	pr_emerg(HW_ERR "Corrupted ...");
> > > on another line leaving a trailing colon without
> > > newline from the pr_emerg above.
> > 
> > Ok, this is in Jacob's patch AFAICT, I'll review those tomorrow but
> > thanks for the heads-up.
> 
> Okay, I'll wait until your feedback before sending V2
> 
> > 
> > > I believe the second pr_emerg should be pr_cont
> > 
> > Actually, it should be
> > 
> > pr_emerg(HW_ERR "\nCorrupted...\n");
> > 
> > to finish the initial pr_emerg.
> 
> Okay, all of the other decode_mc*_mce functions needs to change then.

Hi Jacob.

I don't think that's necessary.

Using any pr_<level>("\n...") is just not good form.
I think the pr_cont would be fine, it's the same
form as all the other decoded continuations.

The new pr_emerg is effectively a termination of the
previous one.  There's no need for 2 consecutive uses
of pr_emerg(HW_ERR ...) here.

cheers, Joe


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  2012-12-17 20:59               ` Joe Perches
@ 2012-12-17 21:08                 ` Borislav Petkov
  2012-12-17 21:11                   ` Jacob Shin
  2012-12-17 21:09                 ` Jacob Shin
  1 sibling, 1 reply; 25+ messages in thread
From: Borislav Petkov @ 2012-12-17 21:08 UTC (permalink / raw)
  To: Jacob Shin; +Cc: Joe Perches, Doug Thompson, linux-edac, linux-kernel

On Mon, Dec 17, 2012 at 12:59:00PM -0800, Joe Perches wrote:
> The new pr_emerg is effectively a termination of the
> previous one.  There's no need for 2 consecutive uses
> of pr_emerg(HW_ERR ...) here.

Actually, in decode_mc5_mce(), the second pr_emerg needs to stay like
what it is now.

So Jacob, please leave those changes out, I'll sort them out later.

Thanks.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  2012-12-17 21:08                 ` Borislav Petkov
@ 2012-12-17 21:11                   ` Jacob Shin
  0 siblings, 0 replies; 25+ messages in thread
From: Jacob Shin @ 2012-12-17 21:11 UTC (permalink / raw)
  To: Borislav Petkov, Joe Perches, Doug Thompson, linux-edac,
	linux-kernel

On Mon, Dec 17, 2012 at 10:08:02PM +0100, Borislav Petkov wrote:
> On Mon, Dec 17, 2012 at 12:59:00PM -0800, Joe Perches wrote:
> > The new pr_emerg is effectively a termination of the
> > previous one.  There's no need for 2 consecutive uses
> > of pr_emerg(HW_ERR ...) here.
> 
> Actually, in decode_mc5_mce(), the second pr_emerg needs to stay like
> what it is now.
> 
> So Jacob, please leave those changes out, I'll sort them out later.

Okay, will do, I'll change fix it in my changes then. But I'll wait
for your feedback tomorrow, before sending out V2.

Thanks!

-Jacob

> 
> Thanks.
> 
> -- 
> Regards/Gruss,
>     Boris.
> 
> Sent from a fat crate under my desk. Formatting is fine.
> --
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  2012-12-17 20:59               ` Joe Perches
  2012-12-17 21:08                 ` Borislav Petkov
@ 2012-12-17 21:09                 ` Jacob Shin
  1 sibling, 0 replies; 25+ messages in thread
From: Jacob Shin @ 2012-12-17 21:09 UTC (permalink / raw)
  To: Joe Perches; +Cc: Borislav Petkov, Doug Thompson, linux-edac, linux-kernel

On Mon, Dec 17, 2012 at 12:59:00PM -0800, Joe Perches wrote:
> On Mon, 2012-12-17 at 14:43 -0600, Jacob Shin wrote:
> > On Mon, Dec 17, 2012 at 09:40:11PM +0100, Borislav Petkov wrote:
> > > On Mon, Dec 17, 2012 at 12:34:35PM -0800, Joe Perches wrote:
> > > > decode_mc2_mce does
> > > > 
> > > > 	pr_emerg(HW_ERR "MC2 Error: ");
> > > > calls
> > > > 	fam_ops->mc2_mce()
> > > > which may not emit anything
> > > > and if it doesn't, does
> > > > 	pr_emerg(HW_ERR "Corrupted ...");
> > > > on another line leaving a trailing colon without
> > > > newline from the pr_emerg above.
> > > 
> > > Ok, this is in Jacob's patch AFAICT, I'll review those tomorrow but
> > > thanks for the heads-up.
> > 
> > Okay, I'll wait until your feedback before sending V2
> > 
> > > 
> > > > I believe the second pr_emerg should be pr_cont
> > > 
> > > Actually, it should be
> > > 
> > > pr_emerg(HW_ERR "\nCorrupted...\n");
> > > 
> > > to finish the initial pr_emerg.
> > 
> > Okay, all of the other decode_mc*_mce functions needs to change then.
> 
> Hi Jacob.
> 
> I don't think that's necessary.
> 
> Using any pr_<level>("\n...") is just not good form.
> I think the pr_cont would be fine, it's the same
> form as all the other decoded continuations.
> 
> The new pr_emerg is effectively a termination of the
> previous one.  There's no need for 2 consecutive uses
> of pr_emerg(HW_ERR ...) here.

Ah, okay, I'll use pr_cont. Thanks!

-Jacob

> 
> cheers, Joe
> 
> 


^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH V2 0/2] MCE, AMD: MCE decoding support for AMD Family 16h
@ 2012-12-18 21:06 Jacob Shin
  2012-12-18 21:06 ` [PATCH 2/2] " Jacob Shin
  0 siblings, 1 reply; 25+ messages in thread
From: Jacob Shin @ 2012-12-18 21:06 UTC (permalink / raw)
  To: Borislav Petkov, Doug Thompson; +Cc: linux-edac, linux-kernel, Jacob Shin

The following patchset enables MCE decoding support for AMD Family 16h
processors.

Changes in V2:
* Changed if/else style and pr_cont usage per feedback from:
  https://lkml.org/lkml/2012/12/17/365

* Merged f14h and f16h mc0 and mc2 decoding into common function per
  feedback from: https://lkml.org/lkml/2012/12/18/269

* Fixed typo in INT_ERROR decoding.

Jacob Shin (2):
  MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well
  MCE, AMD: MCE decoding support for AMD Family 16h

 drivers/edac/mce_amd.c |  139 +++++++++++++++++++++++++++++++++++-------------
 drivers/edac/mce_amd.h |    4 ++
 2 files changed, 105 insertions(+), 38 deletions(-)

-- 
1.7.9.5



^ permalink raw reply	[flat|nested] 25+ messages in thread

* [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-18 21:06 [PATCH V2 0/2] MCE, AMD: MCE decoding support for AMD Family 16h Jacob Shin
@ 2012-12-18 21:06 ` Jacob Shin
  2012-12-18 22:10   ` Joe Perches
  0 siblings, 1 reply; 25+ messages in thread
From: Jacob Shin @ 2012-12-18 21:06 UTC (permalink / raw)
  To: Borislav Petkov, Doug Thompson; +Cc: linux-edac, linux-kernel, Jacob Shin

Add MCE decoding logic for AMD Family 16h processors.

Signed-off-by: Jacob Shin <jacob.shin@amd.com>
---
 drivers/edac/mce_amd.c |   83 +++++++++++++++++++++++++++++++++++++++++-------
 drivers/edac/mce_amd.h |    3 ++
 2 files changed, 75 insertions(+), 11 deletions(-)

diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index e4752be..ce4c8b7 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -64,6 +64,10 @@ EXPORT_SYMBOL_GPL(to_msgs);
 const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
 EXPORT_SYMBOL_GPL(ii_msgs);
 
+/* internal error type */
+const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
+EXPORT_SYMBOL_GPL(uu_msgs);
+
 static const char * const f15h_mc1_mce_desc[] = {
 	"UC during a demand linefill from L2",
 	"Parity error during data load from IC",
@@ -176,7 +180,7 @@ static bool k8_mc0_mce(u16 ec, u8 xec)
 	return f10h_mc0_mce(ec, xec);
 }
 
-static bool f14h_mc0_mce(u16 ec, u8 xec)
+static bool cat_mc0_mce(u16 ec, u8 xec)
 {
 	u8 r4	 = R4(ec);
 	bool ret = true;
@@ -330,22 +334,27 @@ static bool k8_mc1_mce(u16 ec, u8 xec)
 	return ret;
 }
 
-static bool f14h_mc1_mce(u16 ec, u8 xec)
+static bool cat_mc1_mce(u16 ec, u8 xec)
 {
 	u8 r4    = R4(ec);
 	bool ret = true;
 
 	if (MEM_ERROR(ec)) {
-		if (TT(ec) != 0 || LL(ec) != 1)
+		if (TT(ec) != TT_INSTR)
 			ret = false;
-
-		if (r4 == R4_IRD)
+		else if (r4 == R4_IRD)
 			pr_cont("Data/tag array parity error for a tag hit.\n");
 		else if (r4 == R4_SNOOP)
 			pr_cont("Tag error during snoop/victimization.\n");
+		else if (xec == 0x0)
+			pr_cont("Tag parity error from victim castout.\n");
+		else if (xec == 0x2)
+			pr_cont("Microcode patch RAM parity error.\n");
 		else
 			ret = false;
-	}
+	} else
+		ret = false;
+
 	return ret;
 }
 
@@ -469,6 +478,47 @@ static bool f15h_mc2_mce(u16 ec, u8 xec)
 	return ret;
 }
 
+static bool f16h_mc2_mce(u16 ec, u8 xec)
+{
+	u8 r4 = R4(ec);
+
+	if (!MEM_ERROR(ec))
+		return false;
+
+	switch (xec) {
+	case 0x04 ... 0x05:
+		pr_cont("%cBUFF parity error.\n", (r4 == R4_RD) ? 'I' : 'O');
+		break;
+
+	case 0x09 ... 0x0b:
+	case 0x0d ... 0x0f:
+		pr_cont("ECC error in L2 tag (%s).\n",
+			((r4 == R4_GEN)   ? "BankReq" :
+			((r4 == R4_SNOOP) ? "Prb"     : "Fill")));
+		break;
+
+	case 0x10 ... 0x19:
+	case 0x1b:
+		pr_cont("ECC error in L2 data array (%s).\n",
+			(((r4 == R4_RD) && !(xec & 0x3)) ? "Hit"  :
+			((r4 == R4_GEN)   ? "Attr" :
+			((r4 == R4_EVICT) ? "Vict" : "Fill"))));
+		break;
+
+	case 0x1c ... 0x1d:
+	case 0x1f:
+		pr_cont("Parity error in L2 attribute bits (%s).\n",
+			((r4 == R4_RD)  ? "Hit"  :
+			((r4 == R4_GEN) ? "Attr" : "Fill")));
+		break;
+
+	default:
+		return false;
+	}
+
+	return true;
+}
+
 static void decode_mc2_mce(struct mce *m)
 {
 	u16 ec = EC(m->status);
@@ -546,7 +596,7 @@ static void decode_mc4_mce(struct mce *m)
 		return;
 
 	case 0x19:
-		if (boot_cpu_data.x86 == 0x15)
+		if (boot_cpu_data.x86 == 0x15 || boot_cpu_data.x86 == 0x16)
 			pr_cont("Compute Unit Data Error.\n");
 		else
 			goto wrong_mc4_mce;
@@ -632,6 +682,10 @@ static void decode_mc6_mce(struct mce *m)
 
 static inline void amd_decode_err_code(u16 ec)
 {
+	if (INT_ERROR(ec)) {
+		pr_emerg(HW_ERR "internal: %s\n", UU_MSG(ec));
+		return;
+	}
 
 	pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
 
@@ -736,7 +790,7 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
 		((m->status & MCI_STATUS_PCC)	? "PCC"	  : "-"),
 		((m->status & MCI_STATUS_ADDRV)	? "AddrV" : "-"));
 
-	if (c->x86 == 0x15)
+	if (c->x86 == 0x15 || c->x86 == 0x16)
 		pr_cont("|%s|%s",
 			((m->status & MCI_STATUS_DEFERRED) ? "Deferred" : "-"),
 			((m->status & MCI_STATUS_POISON)   ? "Poison"   : "-"));
@@ -768,7 +822,7 @@ static int __init mce_amd_init(void)
 	if (c->x86_vendor != X86_VENDOR_AMD)
 		return 0;
 
-	if (c->x86 < 0xf || c->x86 > 0x15)
+	if (c->x86 < 0xf || c->x86 > 0x16)
 		return 0;
 
 	fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
@@ -802,8 +856,8 @@ static int __init mce_amd_init(void)
 
 	case 0x14:
 		nb_err_cpumask  = 0x3;
-		fam_ops->mc0_mce = f14h_mc0_mce;
-		fam_ops->mc1_mce = f14h_mc1_mce;
+		fam_ops->mc0_mce = cat_mc0_mce;
+		fam_ops->mc1_mce = cat_mc1_mce;
 		fam_ops->mc2_mce = k8_mc2_mce;
 		break;
 
@@ -814,6 +868,13 @@ static int __init mce_amd_init(void)
 		fam_ops->mc2_mce = f15h_mc2_mce;
 		break;
 
+	case 0x16:
+		xec_mask = 0x1f;
+		fam_ops->mc0_mce = cat_mc0_mce;
+		fam_ops->mc1_mce = cat_mc1_mce;
+		fam_ops->mc2_mce = f16h_mc2_mce;
+		break;
+
 	default:
 		printk(KERN_WARNING "Huh? What family is it: 0x%x?!\n", c->x86);
 		kfree(fam_ops);
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h
index 000f6e2..2828908 100644
--- a/drivers/edac/mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -14,6 +14,7 @@
 #define TLB_ERROR(x)			(((x) & 0xFFF0) == 0x0010)
 #define MEM_ERROR(x)			(((x) & 0xFF00) == 0x0100)
 #define BUS_ERROR(x)			(((x) & 0xF800) == 0x0800)
+#define INT_ERROR(x)			(((x) & 0xF4FF) == 0x0400)
 
 #define TT(x)				(((x) >> 2) & 0x3)
 #define TT_MSG(x)			tt_msgs[TT(x)]
@@ -25,6 +26,8 @@
 #define TO_MSG(x)			to_msgs[TO(x)]
 #define PP(x)				(((x) >> 9) & 0x3)
 #define PP_MSG(x)			pp_msgs[PP(x)]
+#define UU(x)				(((x) >> 8) & 0x3)
+#define UU_MSG(x)			uu_msgs[UU(x)]
 
 #define R4(x)				(((x) >> 4) & 0xf)
 #define R4_MSG(x)			((R4(x) < 9) ?  rrrr_msgs[R4(x)] : "Wrong R4!")
-- 
1.7.9.5



^ permalink raw reply related	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-18 21:06 ` [PATCH 2/2] " Jacob Shin
@ 2012-12-18 22:10   ` Joe Perches
  2012-12-18 22:56     ` Borislav Petkov
  0 siblings, 1 reply; 25+ messages in thread
From: Joe Perches @ 2012-12-18 22:10 UTC (permalink / raw)
  To: Jacob Shin; +Cc: Borislav Petkov, Doug Thompson, linux-edac, linux-kernel

On Tue, 2012-12-18 at 15:06 -0600, Jacob Shin wrote:
> Add MCE decoding logic for AMD Family 16h processors.

More trivia:

> diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
[]
> @@ -64,6 +64,10 @@ EXPORT_SYMBOL_GPL(to_msgs);
>  const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
>  EXPORT_SYMBOL_GPL(ii_msgs);
>  
> +/* internal error type */
> +const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
> +EXPORT_SYMBOL_GPL(uu_msgs);

Do these really need to be exported?
These don't seem to be used outside of mce_amd.

If these really need export, are these very good names?
Perhaps these should be prefixed with something?



^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-18 22:10   ` Joe Perches
@ 2012-12-18 22:56     ` Borislav Petkov
  2012-12-20 19:20       ` Borislav Petkov
  0 siblings, 1 reply; 25+ messages in thread
From: Borislav Petkov @ 2012-12-18 22:56 UTC (permalink / raw)
  To: Joe Perches; +Cc: Jacob Shin, Doug Thompson, linux-edac, linux-kernel

On Tue, Dec 18, 2012 at 02:10:59PM -0800, Joe Perches wrote:
> On Tue, 2012-12-18 at 15:06 -0600, Jacob Shin wrote:
> > Add MCE decoding logic for AMD Family 16h processors.
> 
> More trivia:
> 
> > diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
> []
> > @@ -64,6 +64,10 @@ EXPORT_SYMBOL_GPL(to_msgs);
> >  const char * const ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
> >  EXPORT_SYMBOL_GPL(ii_msgs);
> >  
> > +/* internal error type */
> > +const char * const uu_msgs[] = { "RESV", "RESV", "HWA", "RESV" };
> > +EXPORT_SYMBOL_GPL(uu_msgs);
> 
> Do these really need to be exported?
> These don't seem to be used outside of mce_amd.
> 
> If these really need export, are these very good names?
> Perhaps these should be prefixed with something?

Hmm, some of those were used in amd64_edac at some point but then the
whole decoding code got concentrated into mce_amd.c but the exports
remained.

PP() and thus pp_msgs is still used there, for example.

So, if you'd like, you could wait until I apply Jacob's patches and push
that git branch to kernel.org and then write a patch ontop removing all
the exports except pp_msgs (you could add them to the "amd_" namespace
by calling it amd_pp_msgs or something) test that patch and send it to
me.

Thanks.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h
  2012-12-18 22:56     ` Borislav Petkov
@ 2012-12-20 19:20       ` Borislav Petkov
  0 siblings, 0 replies; 25+ messages in thread
From: Borislav Petkov @ 2012-12-20 19:20 UTC (permalink / raw)
  To: Joe Perches; +Cc: Jacob Shin, Doug Thompson, linux-edac, linux-kernel

On Tue, Dec 18, 2012 at 11:56:10PM +0100, Borislav Petkov wrote:
> So, if you'd like, you could wait until I apply Jacob's patches and
> push that git branch to kernel.org and then write a patch ontop
> removing all the exports except pp_msgs (you could add them to the
> "amd_" namespace by calling it amd_pp_msgs or something) test that
> patch and send it to me.

Ok, I've applied Jacob's stuff. You can do your patch ontop of

git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp.git edac-for-3.9

and send it to me.

Thanks.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2012-12-20 19:20 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-12-17 19:39 [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well Jacob Shin
2012-12-17 19:39 ` [PATCH 2/2] MCE, AMD: MCE decoding support for AMD Family 16h Jacob Shin
2012-12-18 17:19   ` Borislav Petkov
2012-12-18 17:30     ` Jacob Shin
2012-12-18 18:09       ` Jacob Shin
2012-12-18 18:32         ` Borislav Petkov
2012-12-18 18:24     ` Joe Perches
2012-12-18 18:33       ` Borislav Petkov
2012-12-18 18:37         ` Joe Perches
2012-12-18 19:00           ` Mauro Carvalho Chehab
2012-12-17 19:57 ` [PATCH 1/2] MCE, AMD: Make MC2 decoding part of amd_decoder_ops as well Joe Perches
2012-12-17 20:05   ` Borislav Petkov
2012-12-17 20:16     ` Joe Perches
2012-12-17 20:22       ` Borislav Petkov
2012-12-17 20:34         ` Joe Perches
2012-12-17 20:40           ` Borislav Petkov
2012-12-17 20:43             ` Jacob Shin
2012-12-17 20:59               ` Joe Perches
2012-12-17 21:08                 ` Borislav Petkov
2012-12-17 21:11                   ` Jacob Shin
2012-12-17 21:09                 ` Jacob Shin
  -- strict thread matches above, loose matches on Subject: below --
2012-12-18 21:06 [PATCH V2 0/2] MCE, AMD: MCE decoding support for AMD Family 16h Jacob Shin
2012-12-18 21:06 ` [PATCH 2/2] " Jacob Shin
2012-12-18 22:10   ` Joe Perches
2012-12-18 22:56     ` Borislav Petkov
2012-12-20 19:20       ` Borislav Petkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox