Linux EDAC development
 help / color / mirror / Atom feed
From: Yazen Ghannam <yazen.ghannam@amd.com>
To: <x86@kernel.org>, Tony Luck <tony.luck@intel.com>
Cc: <linux-kernel@vger.kernel.org>, <linux-edac@vger.kernel.org>,
	<Smita.KoralahalliChannabasappa@amd.com>,
	Yazen Ghannam <yazen.ghannam@amd.com>
Subject: [PATCH v2 10/16] x86/mce: Separate global and per-CPU quirks
Date: Thu, 13 Feb 2025 16:45:59 +0000	[thread overview]
Message-ID: <20250213-wip-mca-updates-v2-10-3636547fe05f@amd.com> (raw)
In-Reply-To: <20250213-wip-mca-updates-v2-0-3636547fe05f@amd.com>

Many quirks are global configuration settings and a handful apply to
each CPU.

Move the per-CPU quirks to vendor init to execute them on each online
CPU. Set the global quirks during BSP-only init so they're only executed
once and early.

Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---

Notes:
    v1->v2:
    * New in v2.

 arch/x86/kernel/cpu/mce/amd.c   | 23 +++++++++++++++++++++++
 arch/x86/kernel/cpu/mce/core.c  | 36 ++----------------------------------
 arch/x86/kernel/cpu/mce/intel.c | 15 +++++++++++++++
 3 files changed, 40 insertions(+), 34 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index bf2b1dc5aaa9..c6510415159f 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -649,6 +649,28 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
 		wrmsrl(MSR_K7_HWCR, hwcr);
 }
 
+static void amd_apply_quirks(struct cpuinfo_x86 *c)
+{
+	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+
+	/* This should be disabled by the BIOS, but isn't always */
+	if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
+		/*
+		 * disable GART TBL walk error reporting, which
+		 * trips off incorrectly with the IOMMU & 3ware
+		 * & Cerberus:
+		 */
+		clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
+	}
+
+	/*
+	 * Various K7s with broken bank 0 around. Always disable
+	 * by default.
+	 */
+	if (c->x86 == 6 && this_cpu_read(mce_num_banks))
+		mce_banks[0].ctl = 0;
+}
+
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
@@ -656,6 +678,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 	u32 low = 0, high = 0, address = 0;
 	int offset = -1;
 
+	amd_apply_quirks(c);
 	mce_flags.amd_threshold	 = 1;
 
 	for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 38db802acde4..1ea52f6259a4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1879,18 +1879,6 @@ static void __mcheck_cpu_init_prepare_banks(void)
 
 static void apply_quirks_amd(struct cpuinfo_x86 *c)
 {
-	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
-	/* This should be disabled by the BIOS, but isn't always */
-	if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
-		/*
-		 * disable GART TBL walk error reporting, which
-		 * trips off incorrectly with the IOMMU & 3ware
-		 * & Cerberus:
-		 */
-		clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
-	}
-
 	if (c->x86 < 0x11 && mca_cfg.bootlog < 0) {
 		/*
 		 * Lots of broken BIOS around that don't clear them
@@ -1899,13 +1887,6 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
 		mca_cfg.bootlog = 0;
 	}
 
-	/*
-	 * Various K7s with broken bank 0 around. Always disable
-	 * by default.
-	 */
-	if (c->x86 == 6 && this_cpu_read(mce_num_banks))
-		mce_banks[0].ctl = 0;
-
 	/*
 	 * overflow_recov is supported for F15h Models 00h-0fh
 	 * even though we don't have a CPUID bit for it.
@@ -1919,23 +1900,10 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
 
 static void apply_quirks_intel(struct cpuinfo_x86 *c)
 {
-	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
 	/* Older CPUs (prior to family 6) don't need quirks. */
 	if (c->x86_vfm < INTEL_PENTIUM_PRO)
 		return;
 
-	/*
-	 * SDM documents that on family 6 bank 0 should not be written
-	 * because it aliases to another special BIOS controlled
-	 * register.
-	 * But it's not aliased anymore on model 0x1a+
-	 * Don't ignore bank 0 completely because there could be a
-	 * valid event later, merely don't write CTL0.
-	 */
-	if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
-		mce_banks[0].init = false;
-
 	/*
 	 * All newer Intel systems support MCE broadcasting. Enable
 	 * synchronization with a one second timeout.
@@ -2255,6 +2223,8 @@ void cpu_mca_init(struct cpuinfo_x86 *c)
 
 	if (cap & MCG_SER_P)
 		mca_cfg.ser = 1;
+
+	__mcheck_cpu_apply_quirks(c);
 }
 
 /*
@@ -2274,8 +2244,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
 
 	__mcheck_cpu_cap_init();
 
-	__mcheck_cpu_apply_quirks(c);
-
 	if (!mce_gen_pool_init()) {
 		mca_cfg.disabled = 1;
 		pr_emerg("Couldn't allocate MCE records pool!\n");
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index f863df0ff42c..1a7aaee14991 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -468,8 +468,23 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
 	}
 }
 
+static void intel_apply_quirks(struct cpuinfo_x86 *c)
+{
+	/*
+	 * SDM documents that on family 6 bank 0 should not be written
+	 * because it aliases to another special BIOS controlled
+	 * register.
+	 * But it's not aliased anymore on model 0x1a+
+	 * Don't ignore bank 0 completely because there could be a
+	 * valid event later, merely don't write CTL0.
+	 */
+	if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
+		this_cpu_ptr(mce_banks_array)[0].init = false;
+}
+
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
+	intel_apply_quirks(c);
 	intel_init_cmci();
 	intel_init_lmce();
 	intel_imc_init(c);

-- 
2.43.0


  parent reply	other threads:[~2025-02-13 16:46 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-02-13 16:45 [PATCH v2 00/16] AMD MCA interrupts rework Yazen Ghannam
2025-02-13 16:45 ` [PATCH v2 01/16] x86/mce: Don't remove sysfs if thresholding sysfs init fails Yazen Ghannam
2025-02-17  6:58   ` Zhuo, Qiuxu
2025-02-13 16:45 ` [PATCH v2 02/16] x86/mce/amd: Remove return value for mce_threshold_create_device() Yazen Ghannam
2025-02-17  7:11   ` Zhuo, Qiuxu
2025-02-17 14:09     ` Yazen Ghannam
2025-02-13 16:45 ` [PATCH v2 03/16] x86/mce/amd: Remove smca_banks_map Yazen Ghannam
2025-02-17  7:57   ` Zhuo, Qiuxu
2025-02-17 14:17     ` Yazen Ghannam
2025-02-13 16:45 ` [PATCH v2 04/16] x86/mce/amd: Put list_head in threshold_bank Yazen Ghannam
2025-02-18  1:28   ` Zhuo, Qiuxu
2025-02-13 16:45 ` [PATCH v2 05/16] x86/mce: Cleanup bank processing on init Yazen Ghannam
2025-02-13 22:32   ` Luck, Tony
2025-02-17 13:55     ` Yazen Ghannam
2025-02-18 16:40       ` Luck, Tony
2025-02-18  2:15   ` Zhuo, Qiuxu
2025-02-13 16:45 ` [PATCH v2 06/16] x86/mce: Remove __mcheck_cpu_init_early() Yazen Ghannam
2025-02-18  3:00   ` Zhuo, Qiuxu
2025-02-19 15:53     ` Yazen Ghannam
2025-02-27 15:25   ` Borislav Petkov
2025-02-27 16:31     ` Yazen Ghannam
2025-02-27 19:33       ` Borislav Petkov
2025-02-27 19:59         ` Yazen Ghannam
2025-02-27 20:48           ` Borislav Petkov
2025-02-28 14:29             ` Yazen Ghannam
2025-02-13 16:45 ` [PATCH v2 07/16] x86/mce: Define BSP-only init Yazen Ghannam
2025-02-18  3:16   ` Zhuo, Qiuxu
2025-02-19 15:57     ` Yazen Ghannam
2025-02-20  1:37       ` Zhuo, Qiuxu
2025-02-20 14:36         ` Yazen Ghannam
2025-02-24 13:28           ` Zhuo, Qiuxu
2025-02-13 16:45 ` [PATCH v2 08/16] x86/mce: Define BSP-only SMCA init Yazen Ghannam
2025-02-18  3:33   ` Zhuo, Qiuxu
2025-02-19 16:01     ` Yazen Ghannam
2025-02-13 16:45 ` [PATCH v2 09/16] x86/mce: Do 'UNKNOWN' vendor check early Yazen Ghannam
2025-02-18  5:31   ` Zhuo, Qiuxu
2025-02-13 16:45 ` Yazen Ghannam [this message]
2025-02-18  6:03   ` [PATCH v2 10/16] x86/mce: Separate global and per-CPU quirks Zhuo, Qiuxu
2025-02-19 16:06     ` Yazen Ghannam
2025-02-20  1:27       ` Zhuo, Qiuxu
2025-02-20 14:37         ` Yazen Ghannam
2025-02-13 16:46 ` [PATCH v2 11/16] x86/mce: Move machine_check_poll() status checks to helper functions Yazen Ghannam
2025-02-18  6:29   ` Zhuo, Qiuxu
2025-02-13 16:46 ` [PATCH v2 12/16] x86/mce: Unify AMD THR handler with MCA Polling Yazen Ghannam
2025-02-18  6:42   ` Zhuo, Qiuxu
2025-02-19 16:07     ` Yazen Ghannam
2025-02-13 16:46 ` [PATCH v2 13/16] x86/mce: Unify AMD DFR " Yazen Ghannam
2025-02-18  7:37   ` Zhuo, Qiuxu
2025-02-19 16:09     ` Yazen Ghannam
2025-02-20  1:41       ` Zhuo, Qiuxu
2025-02-20 14:41         ` Yazen Ghannam
2025-02-24 13:31           ` Zhuo, Qiuxu
2025-02-13 16:46 ` [PATCH v2 14/16] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA systems Yazen Ghannam
2025-02-18  8:23   ` Zhuo, Qiuxu
2025-02-19 16:16     ` Yazen Ghannam
2025-02-13 16:46 ` [PATCH v2 15/16] x86/mce/amd: Support SMCA Corrected Error Interrupt Yazen Ghannam
2025-02-13 22:34   ` Luck, Tony
2025-02-17 14:06     ` Yazen Ghannam
2025-02-18 13:27   ` Zhuo, Qiuxu
2025-02-19 16:19     ` Yazen Ghannam
2025-02-13 16:46 ` [PATCH v2 16/16] x86/mce: Handle AMD threshold interrupt storms Yazen Ghannam
2025-02-18 13:51   ` Zhuo, Qiuxu
2025-02-13 22:40 ` [PATCH v2 00/16] AMD MCA interrupts rework Luck, Tony

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250213-wip-mca-updates-v2-10-3636547fe05f@amd.com \
    --to=yazen.ghannam@amd.com \
    --cc=Smita.KoralahalliChannabasappa@amd.com \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox