public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Borislav Petkov <borislav.petkov@amd.com>
To: akpm@linux-foundation.org, greg@kroah.com
Cc: mingo@elte.hu, tglx@linutronix.de, hpa@zytor.com,
	dougthompson@xmission.com, <linux-kernel@vger.kernel.org>,
	Borislav Petkov <borislav.petkov@amd.com>
Subject: [PATCH 18/21] amd64_edac: add ECC reporting initializers
Date: Wed, 29 Apr 2009 18:55:04 +0200	[thread overview]
Message-ID: <1241024107-14535-19-git-send-email-borislav.petkov@amd.com> (raw)
In-Reply-To: <1241024107-14535-1-git-send-email-borislav.petkov@amd.com>

From: Doug Thompson <dougthompson@xmission.com>

Signed-off-by: Doug Thompson <dougthompson@xmission.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 drivers/edac/amd64_edac.c |  242 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 242 insertions(+), 0 deletions(-)

diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 8cf8060..43f236d 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -70,6 +70,7 @@
 #include <linux/slab.h>
 #include <linux/mmzone.h>
 #include <linux/edac.h>
+#include <asm/msr.h>
 #include "edac_core.h"
 
 #define amd64_printk(level, fmt, arg...) \
@@ -4165,3 +4166,244 @@ static int amd64_init_csrows(struct mem_ctl_info *mci)
 	return empty;
 }
 
+/*
+ * amd64_enable_ecc_error_reporting
+ *
+ *	Only if 'ecc_enable_override' is set AND BIOS had ECC disabled,
+ *	do "we" enable it.
+ *
+ *	On each NB we need to enable the hardware to
+ *	generate and detect error events
+ *
+ *	1) NB Control Register
+ *	2) Global MCE Reporting Control Reg (MCGCTL)
+ */
+static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
+{
+	struct amd64_pvt *pvt = mci->pvt_info;
+	const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
+	int idx = 0, cpu, err;
+	int cpus_on_node = cpumask_weight(cpumask);
+	u32 mcgctl_l[cpus_on_node], mcgctl_h[cpus_on_node];
+	u32 value;
+	u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+
+	if (!ecc_enable_override)
+		return;
+
+	amd64_printk(KERN_WARNING,
+		"'ecc_enable_override' parameter is active, "
+		"Enabling AMD ECC hardware now: CAUTION\n");
+
+	/* 1) read the NB Control register, and save old Enable bits */
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
+	if (err != 0)
+		debugf0("%s() Reading K8_NBCTL failed\n", __func__);
+
+	/* save old value and then turn on UECCn and CECCEn bits
+	 * and write it back out, thus turning ON ECC for sure
+	 */
+	pvt->old_nbctl = value & mask;
+	pvt->nbctl_mcgctl_saved = 1;	/* Mark 'old' ECC values valid */
+
+	value |= mask;
+	pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+
+	debugf0("%s() Old NBCTL 0x%x New NBCTL= 0x%x\n",
+		__func__, pvt->old_nbctl, value);
+
+	/* 2) Read and save the NB Enable bit at entry. Enable the bit
+	 * then write the enabled value back to hardware
+	 */
+	rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, mcgctl_l, mcgctl_h);
+
+	for_each_cpu(cpu, cpumask) {
+		pvt->old_mcgctl[idx] = mcgctl_l[idx] & K8_MSR_MCGCTL_NBE;
+		mcgctl_l[idx] |= K8_MSR_MCGCTL_NBE;
+
+		debugf0("%s(), cpu %d, Old MCGCTL[NBE] = 0x%x New MCGCTL=0x%x\n",
+			__func__, cpu, (unsigned int) pvt->old_mcgctl[idx],
+			(unsigned int) mcgctl_l[idx]);
+
+		idx++;
+	}
+	wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, mcgctl_l, mcgctl_h);
+
+	/* 3) Read the NB CFG to ensure DRAM ECC is on and then
+	 * keep a copy of the hw register in the control structure
+	 */
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+	if (err != 0)
+		debugf0("%s() Reading K8_NBCFG failed\n", __func__);
+
+	debugf0("%s() NBCFG(1)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n",
+		__func__, value,
+		value & (K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+		value & (K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"
+		);
+
+	if (!(value & K8_NBCFG_ECC_ENABLE)) {
+		amd64_printk(KERN_WARNING,
+			"This node reports that DRAM ECC is "
+			"currently Disabled; ENABLING now\n");
+
+		/* Attempt to turn on DRAM ECC Enable */
+		value |= K8_NBCFG_ECC_ENABLE;
+		pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value);
+
+		err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+		if (err != 0)
+			debugf0("%s() Reading K8_NBCFG failed\n", __func__);
+
+		if (!(value & K8_NBCFG_ECC_ENABLE)) {
+			amd64_printk(KERN_WARNING,
+				"Hardware rejects Enabling DRAM ECC checking\n"
+				"Check memory DIMM configuration\n");
+		} else {
+			amd64_printk(KERN_DEBUG,
+				"Hardware accepted DRAM ECC Enable\n");
+		}
+	}
+	debugf0("%s() NBCFG(2)= 0x%x  CHIPKILL= %s ECC_ENABLE= %s\n",
+		__func__, value,
+		(value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
+		(value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"
+		);
+
+	pvt->ctl_error_info.nbcfg = value;
+}
+
+/*
+ * amd64_restore_ecc_error_reporting
+ *
+ *	restore the hardware registers to their initial condition
+ *	prior to when amd64_enable_ecc_error_reporting was called
+ */
+static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
+{
+	const cpumask_t *cpumask = cpumask_of_node(pvt->mc_node_id);
+	int cpus_on_node = cpumask_weight(cpumask), idx = 0, cpu;
+	u32 mcgctl_l[cpus_on_node], mcgctl_h[cpus_on_node];
+	u32 value;
+	u32 mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
+	int err;
+
+	if (!pvt->nbctl_mcgctl_saved)
+		return;
+
+	err = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCTL, &value);
+	if (err != 0)
+		debugf0("%s() Reading K8_NBCTL failed\n", __func__);
+	value &= ~mask;
+	value |= pvt->old_nbctl;
+
+	/* restore the NB Enable MCGCTL bit */
+	pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+
+	rdmsr_on_cpus(cpumask, K8_MSR_MCGCTL, mcgctl_l, mcgctl_h);
+
+	for_each_cpu(cpu, cpumask) {
+		mcgctl_l[idx] &= ~K8_MSR_MCGCTL_NBE;
+		mcgctl_l[idx] |= pvt->old_mcgctl[idx];
+		idx++;
+	}
+
+	wrmsr_on_cpus(cpumask, K8_MSR_MCGCTL, mcgctl_l, mcgctl_h);
+}
+
+static void check_mcg_ctl(void *ret)
+{
+	u64 msr_val = 0;
+	u8 nbe;
+
+	rdmsrl(MSR_IA32_MCG_CTL, msr_val);
+	nbe = msr_val & K8_MSR_MCGCTL_NBE;
+
+	debugf0("%s: core: %u, MCG_CTL: 0x%llx, NB MSR is %s\n",
+		__func__, raw_smp_processor_id(), msr_val,
+		(nbe ? "enabled" : "disabled"));
+
+	if (!nbe)
+		*(int *)ret = 0;
+}
+
+static int amd64_mcg_ctl_enabled_on_cpus(const cpumask_t *mask)
+{
+	int rc = 1;
+	preempt_disable();
+	smp_call_function_many(mask, check_mcg_ctl, &rc, 1);
+	preempt_enable();
+
+	return rc;
+}
+
+/*
+ * amd64_check_ecc_enabled
+ *
+ *	EDAC requires that the BIOS have ECC enabled before taking over the
+ *	processing of ECC errors. This is because the BIOS can properly
+ *	initialize the memory system completely.
+ *
+ *	For development and other purposes, there is a command line option
+ *	which allows for overriding this contraint. If supplied on the kernrel
+ *	command line, hardware ECC is force-enabled later in
+ *	amd64_enable_ecc_error_reporting().
+ */
+static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
+{
+	u32 value;
+	int tmp;
+	int rc = 0;
+
+	tmp = pci_read_config_dword(pvt->misc_f3_ctl, K8_NBCFG, &value);
+	if (tmp != 0)
+		debugf0("%s() Reading K8_NBCTL failed\n", __func__);
+
+	/* check MCG_CTL on all the cpus on this node */
+	rc = amd64_mcg_ctl_enabled_on_cpus(cpumask_of_node(pvt->mc_node_id));
+
+	debugf0("%s() K8_NBCFG=0x%x,  DRAM ECC is %s\n",
+		__func__, value, (value & K8_NBCFG_ECC_ENABLE ? "enabled"
+					: "disabled"));
+	if (!tmp || !rc) {
+		if (!tmp) {
+			amd64_printk(KERN_WARNING, "This node reports that "
+						   "Memory ECC is currently "
+						   "disabled.\n");
+
+			amd64_printk(KERN_WARNING, "bit 0x%lx in register "
+				"F3x%x of the MISC_CONTROL device (%s) "
+				"should be enabled\n", K8_NBCFG_ECC_ENABLE,
+				K8_NBCFG, pci_name(pvt->misc_f3_ctl));
+		}
+		if (!rc) {
+			amd64_printk(KERN_WARNING, "bit 0x%016lx in MSR 0x%08x "
+					"of node %d should be enabled\n",
+					K8_MSR_MCGCTL_NBE, MSR_IA32_MCG_CTL,
+					pvt->mc_node_id);
+		}
+		if (!ecc_enable_override) {
+			amd64_printk(KERN_WARNING, "WARNING: ECC is NOT "
+				"currently enabled by the BIOS. Module "
+				"will NOT be loaded.\n"
+				"    Either Enable ECC in the BIOS, "
+				"or use the 'ecc_enable_override' "
+				"parameter.\n"
+				"    Might be a BIOS bug, if BIOS says "
+				"ECC is enabled\n"
+				"    Use of the override can cause "
+				"unknown side effects.\n");
+			rc = -ENODEV;
+		}
+	} else {
+		amd64_printk(KERN_INFO,
+			"ECC is enabled by BIOS, Proceeding "
+			"with EDAC module initialization\n");
+
+		/* CLEAR the override, since BIOS controlled it */
+		ecc_enable_override = 0;
+	}
+
+	return rc;
+}
+
-- 
1.6.2.4



  parent reply	other threads:[~2009-04-29 17:01 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-04-29 16:54 [RFC PATCH 00/21 v2] amd64_edac: EDAC module for AMD64 Borislav Petkov
2009-04-29 16:54 ` [PATCH 01/21] x86: add methods for writing of an MSR on several CPUs Borislav Petkov
2009-04-29 17:39   ` H. Peter Anvin
2009-05-04 16:46     ` Borislav Petkov
2009-05-04 17:25       ` H. Peter Anvin
2009-05-04 17:53         ` Borislav Petkov
2009-05-04 20:51           ` Mauro Carvalho Chehab
2009-04-29 16:54 ` [PATCH 02/21] amd64_edac: add PCI config register defines Borislav Petkov
2009-05-04 20:54   ` Mauro Carvalho Chehab
2009-04-29 16:54 ` [PATCH 03/21] amd64_edac: add driver structs Borislav Petkov
2009-05-04 20:38   ` Mauro Carvalho Chehab
2009-04-29 16:54 ` [PATCH 04/21] amd64_edac: add memory scrubber interface Borislav Petkov
2009-05-04 21:02   ` Mauro Carvalho Chehab
2009-04-29 16:54 ` [PATCH 05/21] amd64_edac: add sys addr to memory controller mapping helpers Borislav Petkov
2009-05-04 21:08   ` Mauro Carvalho Chehab
2009-04-29 16:54 ` [PATCH 06/21] amd64_edac: add functionality to compute the DRAM hole Borislav Petkov
2009-05-04 21:22   ` Mauro Carvalho Chehab
2009-04-29 16:54 ` [PATCH 07/21] amd64_edac: add DRAM address type conversion facilities Borislav Petkov
2009-05-04 21:39   ` Mauro Carvalho Chehab
2009-04-29 16:54 ` [PATCH 08/21] amd64_edac: add helper to dump relevant registers Borislav Petkov
2009-05-04 21:43   ` Mauro Carvalho Chehab
2009-04-29 16:54 ` [PATCH 09/21] amd64_edac: assign DRAM chip select base and mask in a family-specific way Borislav Petkov
2009-05-04 21:59   ` Mauro Carvalho Chehab
2009-05-05 10:25     ` Borislav Petkov
2009-04-29 16:54 ` [PATCH 10/21] amd64_edac: add k8-specific methods Borislav Petkov
2009-05-04 22:06   ` Mauro Carvalho Chehab
2009-04-29 16:54 ` [PATCH 11/21] amd64_edac: add f10-and-later methods-p1 Borislav Petkov
2009-05-04 22:10   ` Mauro Carvalho Chehab
2009-04-29 16:54 ` [PATCH 12/21] amd64_edac: add f10-and-later methods-p2 Borislav Petkov
2009-05-04 23:25   ` Mauro Carvalho Chehab
2009-04-29 16:54 ` [PATCH 13/21] amd64_edac: add f10-and-later methods-p3 Borislav Petkov
2009-04-29 18:22   ` Ingo Molnar
2009-04-29 18:24     ` Ingo Molnar
2009-04-29 19:05     ` Andrew Morton
2009-04-29 19:23       ` Ingo Molnar
2009-04-29 19:42         ` Andrew Morton
2009-04-29 19:53           ` Ingo Molnar
2009-04-29 20:47             ` Ingo Molnar
2009-04-30 10:01               ` Borislav Petkov
2009-04-30 10:42                 ` Ingo Molnar
2009-05-04 23:36   ` Mauro Carvalho Chehab
2009-04-29 16:55 ` [PATCH 14/21] amd64_edac: add per-family descriptors Borislav Petkov
2009-05-04 23:39   ` Mauro Carvalho Chehab
2009-04-29 16:55 ` [PATCH 15/21] amd64_edac: add ECC chipkill syndrome mapping table Borislav Petkov
2009-05-04 23:42   ` Mauro Carvalho Chehab
2009-04-29 16:55 ` [PATCH 16/21] amd64_edac: add error decoding logic Borislav Petkov
2009-04-29 18:19   ` Ingo Molnar
2009-05-04 23:48   ` Mauro Carvalho Chehab
2009-04-29 16:55 ` [PATCH 17/21] amd64_edac: add EDAC core-related initializers Borislav Petkov
2009-05-04 23:53   ` Mauro Carvalho Chehab
2009-04-29 16:55 ` Borislav Petkov [this message]
2009-05-04 23:59   ` [PATCH 18/21] amd64_edac: add ECC reporting initializers Mauro Carvalho Chehab
2009-04-29 16:55 ` [PATCH 19/21] amd64_edac: add debugging/testing code Borislav Petkov
2009-04-29 18:18   ` Ingo Molnar
2009-04-29 16:55 ` [PATCH 20/21] amd64_edac: add DRAM error injection logic using sysfs Borislav Petkov
2009-04-29 18:17   ` Ingo Molnar
2009-05-05  0:06   ` Mauro Carvalho Chehab
2009-04-29 16:55 ` [PATCH 21/21] amd64_edac: add module registration routines Borislav Petkov
2009-05-05  0:10   ` Mauro Carvalho Chehab
2009-04-29 19:30 ` [RFC PATCH 00/21 v2] amd64_edac: EDAC module for AMD64 Andi Kleen
2009-04-30 11:57   ` Borislav Petkov
2009-04-30 12:21     ` Ingo Molnar
2009-04-30 12:47     ` Andi Kleen
2009-04-30 14:48       ` Aristeu Rozanski
2009-05-01  7:53         ` Borislav Petkov
2009-05-03  0:32           ` Aristeu Rozanski
2009-04-30 18:37       ` Mauro Carvalho Chehab
2009-05-01 12:39       ` Ingo Molnar
  -- strict thread matches above, loose matches on Subject: below --
2009-04-28 15:05 [RFC PATCH 00/21] " Borislav Petkov
2009-04-28 15:06 ` [PATCH 18/21] amd64_edac: add ECC reporting initializers Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1241024107-14535-19-git-send-email-borislav.petkov@amd.com \
    --to=borislav.petkov@amd.com \
    --cc=akpm@linux-foundation.org \
    --cc=dougthompson@xmission.com \
    --cc=greg@kroah.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=tglx@linutronix.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox