linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yazen Ghannam <yazen.ghannam@amd.com>
To: <x86@kernel.org>, Tony Luck <tony.luck@intel.com>,
	"Rafael J. Wysocki" <rafael@kernel.org>
Cc: <linux-kernel@vger.kernel.org>, <linux-edac@vger.kernel.org>,
	<Smita.KoralahalliChannabasappa@amd.com>,
	Qiuxu Zhuo <qiuxu.zhuo@intel.com>,
	Nikolay Borisov <nik.borisov@suse.com>,
	<linux-acpi@vger.kernel.org>,
	"Yazen Ghannam" <yazen.ghannam@amd.com>
Subject: [PATCH v5 11/20] x86/mce: Separate global and per-CPU quirks
Date: Mon, 25 Aug 2025 17:33:08 +0000	[thread overview]
Message-ID: <20250825-wip-mca-updates-v5-11-865768a2eef8@amd.com> (raw)
In-Reply-To: <20250825-wip-mca-updates-v5-0-865768a2eef8@amd.com>

Many quirks are global configuration settings and a handful apply to
each CPU.

Move the per-CPU quirks to vendor init to execute them on each online
CPU. Set the global quirks during BSP-only init so they're only executed
once and early.

Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---

Notes:
    Link:
    https://lore.kernel.org/r/20250624-wip-mca-updates-v4-14-236dd74f645f@amd.com
    
    v4->v5:
    * Apply consistent naming to quirk functions.
    
    v3->v4:
    * Add newline in mce_amd_feature_init().
    * Remove __mcheck_cpu_apply_quirks().
    * Update code comment ref. __mcheck_cpu_apply_quirks().
    
    v2->v3:
    * Update code comment.
    * Add tags from Qiuxu and Tony.
    
    v1->v2:
    * New in v2.

 arch/x86/kernel/cpu/mce/amd.c   | 24 ++++++++++++
 arch/x86/kernel/cpu/mce/core.c  | 85 +++++++++++------------------------------
 arch/x86/kernel/cpu/mce/intel.c | 18 +++++++++
 3 files changed, 65 insertions(+), 62 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index efcfce329ca7..42f5c115395b 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -650,6 +650,28 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
 		wrmsrq(MSR_K7_HWCR, hwcr);
 }
 
+static void amd_apply_cpu_quirks(struct cpuinfo_x86 *c)
+{
+	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+
+	/* This should be disabled by the BIOS, but isn't always */
+	if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
+		/*
+		 * disable GART TBL walk error reporting, which
+		 * trips off incorrectly with the IOMMU & 3ware
+		 * & Cerberus:
+		 */
+		clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
+	}
+
+	/*
+	 * Various K7s with broken bank 0 around. Always disable
+	 * by default.
+	 */
+	if (c->x86 == 6 && this_cpu_read(mce_num_banks))
+		mce_banks[0].ctl = 0;
+}
+
 /* cpu init entry point, called from mce.c with preempt off */
 void mce_amd_feature_init(struct cpuinfo_x86 *c)
 {
@@ -657,6 +679,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
 	u32 low = 0, high = 0, address = 0;
 	int offset = -1;
 
+	amd_apply_cpu_quirks(c);
+
 	mce_flags.amd_threshold	 = 1;
 
 	for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 14456f6c2f7b..21a5ea239e93 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1807,8 +1807,9 @@ static void __mcheck_cpu_mce_banks_init(void)
 		struct mce_bank *b = &mce_banks[i];
 
 		/*
-		 * Init them all, __mcheck_cpu_apply_quirks() is going to apply
-		 * the required vendor quirks before
+		 * Init them all by default.
+		 *
+		 * The required vendor quirks will be applied before
 		 * __mcheck_cpu_init_prepare_banks() does the final bank setup.
 		 */
 		b->ctl = -1ULL;
@@ -1882,20 +1883,8 @@ static void __mcheck_cpu_init_prepare_banks(void)
 	}
 }
 
-static void apply_quirks_amd(struct cpuinfo_x86 *c)
+static void amd_apply_global_quirks(struct cpuinfo_x86 *c)
 {
-	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
-	/* This should be disabled by the BIOS, but isn't always */
-	if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
-		/*
-		 * disable GART TBL walk error reporting, which
-		 * trips off incorrectly with the IOMMU & 3ware
-		 * & Cerberus:
-		 */
-		clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
-	}
-
 	if (c->x86 < 0x11 && mca_cfg.bootlog < 0) {
 		/*
 		 * Lots of broken BIOS around that don't clear them
@@ -1904,13 +1893,6 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
 		mca_cfg.bootlog = 0;
 	}
 
-	/*
-	 * Various K7s with broken bank 0 around. Always disable
-	 * by default.
-	 */
-	if (c->x86 == 6 && this_cpu_read(mce_num_banks))
-		mce_banks[0].ctl = 0;
-
 	/*
 	 * overflow_recov is supported for F15h Models 00h-0fh
 	 * even though we don't have a CPUID bit for it.
@@ -1922,25 +1904,12 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
 		mce_flags.zen_ifu_quirk = 1;
 }
 
-static void apply_quirks_intel(struct cpuinfo_x86 *c)
+static void intel_apply_global_quirks(struct cpuinfo_x86 *c)
 {
-	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
 	/* Older CPUs (prior to family 6) don't need quirks. */
 	if (c->x86_vfm < INTEL_PENTIUM_PRO)
 		return;
 
-	/*
-	 * SDM documents that on family 6 bank 0 should not be written
-	 * because it aliases to another special BIOS controlled
-	 * register.
-	 * But it's not aliased anymore on model 0x1a+
-	 * Don't ignore bank 0 completely because there could be a
-	 * valid event later, merely don't write CTL0.
-	 */
-	if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
-		mce_banks[0].init = false;
-
 	/*
 	 * All newer Intel systems support MCE broadcasting. Enable
 	 * synchronization with a one second timeout.
@@ -1966,7 +1935,7 @@ static void apply_quirks_intel(struct cpuinfo_x86 *c)
 		mce_flags.skx_repmov_quirk = 1;
 }
 
-static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
+static void zhaoxin_apply_global_quirks(struct cpuinfo_x86 *c)
 {
 	/*
 	 * All newer Zhaoxin CPUs support MCE broadcasting. Enable
@@ -1978,29 +1947,6 @@ static void apply_quirks_zhaoxin(struct cpuinfo_x86 *c)
 	}
 }
 
-/* Add per CPU specific workarounds here */
-static void __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
-{
-	struct mca_config *cfg = &mca_cfg;
-
-	switch (c->x86_vendor) {
-	case X86_VENDOR_AMD:
-		apply_quirks_amd(c);
-		break;
-	case X86_VENDOR_INTEL:
-		apply_quirks_intel(c);
-		break;
-	case X86_VENDOR_ZHAOXIN:
-		apply_quirks_zhaoxin(c);
-		break;
-	}
-
-	if (cfg->monarch_timeout < 0)
-		cfg->monarch_timeout = 0;
-	if (cfg->bootlog != 0)
-		cfg->panic_timeout = 30;
-}
-
 static bool __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
 {
 	if (c->x86 != 5)
@@ -2258,6 +2204,23 @@ void mca_bsp_init(struct cpuinfo_x86 *c)
 
 	if (cap & MCG_SER_P)
 		mca_cfg.ser = 1;
+
+	switch (c->x86_vendor) {
+	case X86_VENDOR_AMD:
+		amd_apply_global_quirks(c);
+		break;
+	case X86_VENDOR_INTEL:
+		intel_apply_global_quirks(c);
+		break;
+	case X86_VENDOR_ZHAOXIN:
+		zhaoxin_apply_global_quirks(c);
+		break;
+	}
+
+	if (mca_cfg.monarch_timeout < 0)
+		mca_cfg.monarch_timeout = 0;
+	if (mca_cfg.bootlog != 0)
+		mca_cfg.panic_timeout = 30;
 }
 
 /*
@@ -2277,8 +2240,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
 
 	__mcheck_cpu_cap_init();
 
-	__mcheck_cpu_apply_quirks(c);
-
 	if (!mce_gen_pool_init()) {
 		mca_cfg.disabled = 1;
 		pr_emerg("Couldn't allocate MCE records pool!\n");
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 9b149b9c4109..4655223ba560 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -468,8 +468,26 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
 	}
 }
 
+static void intel_apply_cpu_quirks(struct cpuinfo_x86 *c)
+{
+	/*
+	 * SDM documents that on family 6 bank 0 should not be written
+	 * because it aliases to another special BIOS controlled
+	 * register.
+	 * But it's not aliased anymore on model 0x1a+
+	 * Don't ignore bank 0 completely because there could be a
+	 * valid event later, merely don't write CTL0.
+	 *
+	 * Older CPUs (prior to family 6) can't reach this point and already
+	 * return early due to the check of __mcheck_cpu_ancient_init().
+	 */
+	if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
+		this_cpu_ptr(mce_banks_array)[0].init = false;
+}
+
 void mce_intel_feature_init(struct cpuinfo_x86 *c)
 {
+	intel_apply_cpu_quirks(c);
 	intel_init_cmci();
 	intel_init_lmce();
 	intel_imc_init(c);

-- 
2.51.0


  parent reply	other threads:[~2025-08-25 17:33 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-25 17:32 [PATCH v5 00/20] AMD MCA interrupts rework Yazen Ghannam
2025-08-25 17:32 ` [PATCH v5 01/20] x86/mce/amd: Rename threshold restart function Yazen Ghannam
2025-08-25 17:32 ` [PATCH v5 02/20] x86/mce/amd: Remove return value for mce_threshold_{create,remove}_device() Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 03/20] x86/mce/amd: Remove smca_banks_map Yazen Ghannam
2025-08-25 18:19   ` Borislav Petkov
2025-08-25 19:54     ` Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 04/20] x86/mce/amd: Put list_head in threshold_bank Yazen Ghannam
2025-09-01 15:41   ` Nikolay Borisov
2025-09-01 16:41     ` Borislav Petkov
2025-08-25 17:33 ` [PATCH v5 05/20] x86/mce: Cleanup bank processing on init Yazen Ghannam
2025-08-26 12:35   ` Borislav Petkov
2025-08-26 13:47     ` Yazen Ghannam
2025-08-26 14:33       ` Borislav Petkov
2025-08-25 17:33 ` [PATCH v5 06/20] x86/mce: Remove __mcheck_cpu_init_early() Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 07/20] x86/mce: Reorder __mcheck_cpu_init_generic() call Yazen Ghannam
2025-09-01 17:07   ` Borislav Petkov
2025-09-02 13:30     ` Yazen Ghannam
2025-09-02 16:26       ` Borislav Petkov
2025-09-02 17:14         ` Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 08/20] x86/mce: Define BSP-only init Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 09/20] x86/mce: Define BSP-only SMCA init Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 10/20] x86/mce: Do 'UNKNOWN' vendor check early Yazen Ghannam
2025-08-25 17:33 ` Yazen Ghannam [this message]
2025-08-25 17:33 ` [PATCH v5 12/20] x86/mce: Move machine_check_poll() status checks to helper functions Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 13/20] x86/mce: Unify AMD THR handler with MCA Polling Yazen Ghannam
2025-09-02 11:10   ` Borislav Petkov
2025-09-02 13:37     ` Yazen Ghannam
2025-09-02 17:04       ` Borislav Petkov
2025-09-02 17:25         ` Yazen Ghannam
2025-09-03  9:48           ` Borislav Petkov
2025-08-25 17:33 ` [PATCH v5 14/20] x86/mce: Unify AMD DFR " Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 15/20] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA systems Yazen Ghannam
2025-09-03 10:03   ` Borislav Petkov
2025-09-03 14:00     ` Yazen Ghannam
2025-09-03 15:39       ` Borislav Petkov
2025-08-25 17:33 ` [PATCH v5 16/20] x86/mce/amd: Support SMCA Corrected Error Interrupt Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 17/20] x86/mce/amd: Remove redundant reset_block() Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 18/20] x86/mce/amd: Define threshold restart function for banks Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 19/20] x86/mce: Handle AMD threshold interrupt storms Yazen Ghannam
2025-08-25 17:33 ` [PATCH v5 20/20] x86/mce: Save and use APEI corrected threshold limit Yazen Ghannam

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250825-wip-mca-updates-v5-11-865768a2eef8@amd.com \
    --to=yazen.ghannam@amd.com \
    --cc=Smita.KoralahalliChannabasappa@amd.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nik.borisov@suse.com \
    --cc=qiuxu.zhuo@intel.com \
    --cc=rafael@kernel.org \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).