From: Yazen Ghannam <yazen.ghannam@amd.com>
To: <x86@kernel.org>, Tony Luck <tony.luck@intel.com>
Cc: <linux-kernel@vger.kernel.org>, <linux-edac@vger.kernel.org>,
<Smita.KoralahalliChannabasappa@amd.com>,
Yazen Ghannam <yazen.ghannam@amd.com>
Subject: [PATCH v2 10/16] x86/mce: Separate global and per-CPU quirks
Date: Thu, 13 Feb 2025 16:45:59 +0000 [thread overview]
Message-ID: <20250213-wip-mca-updates-v2-10-3636547fe05f@amd.com> (raw)
In-Reply-To: <20250213-wip-mca-updates-v2-0-3636547fe05f@amd.com>
Many quirks are global configuration settings and a handful apply to
each CPU.
Move the per-CPU quirks to vendor init to execute them on each online
CPU. Set the global quirks during BSP-only init so they're only executed
once and early.
Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
---
Notes:
v1->v2:
* New in v2.
arch/x86/kernel/cpu/mce/amd.c | 23 +++++++++++++++++++++++
arch/x86/kernel/cpu/mce/core.c | 36 ++----------------------------------
arch/x86/kernel/cpu/mce/intel.c | 15 +++++++++++++++
3 files changed, 40 insertions(+), 34 deletions(-)
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index bf2b1dc5aaa9..c6510415159f 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -649,6 +649,28 @@ static void disable_err_thresholding(struct cpuinfo_x86 *c, unsigned int bank)
wrmsrl(MSR_K7_HWCR, hwcr);
}
+static void amd_apply_quirks(struct cpuinfo_x86 *c)
+{
+ struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
+
+ /* This should be disabled by the BIOS, but isn't always */
+ if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
+ /*
+ * disable GART TBL walk error reporting, which
+ * trips off incorrectly with the IOMMU & 3ware
+ * & Cerberus:
+ */
+ clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
+ }
+
+ /*
+ * Various K7s with broken bank 0 around. Always disable
+ * by default.
+ */
+ if (c->x86 == 6 && this_cpu_read(mce_num_banks))
+ mce_banks[0].ctl = 0;
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -656,6 +678,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
u32 low = 0, high = 0, address = 0;
int offset = -1;
+ amd_apply_quirks(c);
mce_flags.amd_threshold = 1;
for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank) {
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 38db802acde4..1ea52f6259a4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1879,18 +1879,6 @@ static void __mcheck_cpu_init_prepare_banks(void)
static void apply_quirks_amd(struct cpuinfo_x86 *c)
{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
- /* This should be disabled by the BIOS, but isn't always */
- if (c->x86 == 15 && this_cpu_read(mce_num_banks) > 4) {
- /*
- * disable GART TBL walk error reporting, which
- * trips off incorrectly with the IOMMU & 3ware
- * & Cerberus:
- */
- clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
- }
-
if (c->x86 < 0x11 && mca_cfg.bootlog < 0) {
/*
* Lots of broken BIOS around that don't clear them
@@ -1899,13 +1887,6 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
mca_cfg.bootlog = 0;
}
- /*
- * Various K7s with broken bank 0 around. Always disable
- * by default.
- */
- if (c->x86 == 6 && this_cpu_read(mce_num_banks))
- mce_banks[0].ctl = 0;
-
/*
* overflow_recov is supported for F15h Models 00h-0fh
* even though we don't have a CPUID bit for it.
@@ -1919,23 +1900,10 @@ static void apply_quirks_amd(struct cpuinfo_x86 *c)
static void apply_quirks_intel(struct cpuinfo_x86 *c)
{
- struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
-
/* Older CPUs (prior to family 6) don't need quirks. */
if (c->x86_vfm < INTEL_PENTIUM_PRO)
return;
- /*
- * SDM documents that on family 6 bank 0 should not be written
- * because it aliases to another special BIOS controlled
- * register.
- * But it's not aliased anymore on model 0x1a+
- * Don't ignore bank 0 completely because there could be a
- * valid event later, merely don't write CTL0.
- */
- if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
- mce_banks[0].init = false;
-
/*
* All newer Intel systems support MCE broadcasting. Enable
* synchronization with a one second timeout.
@@ -2255,6 +2223,8 @@ void cpu_mca_init(struct cpuinfo_x86 *c)
if (cap & MCG_SER_P)
mca_cfg.ser = 1;
+
+ __mcheck_cpu_apply_quirks(c);
}
/*
@@ -2274,8 +2244,6 @@ void mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_cap_init();
- __mcheck_cpu_apply_quirks(c);
-
if (!mce_gen_pool_init()) {
mca_cfg.disabled = 1;
pr_emerg("Couldn't allocate MCE records pool!\n");
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index f863df0ff42c..1a7aaee14991 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -468,8 +468,23 @@ static void intel_imc_init(struct cpuinfo_x86 *c)
}
}
+static void intel_apply_quirks(struct cpuinfo_x86 *c)
+{
+ /*
+ * SDM documents that on family 6 bank 0 should not be written
+ * because it aliases to another special BIOS controlled
+ * register.
+ * But it's not aliased anymore on model 0x1a+
+ * Don't ignore bank 0 completely because there could be a
+ * valid event later, merely don't write CTL0.
+ */
+ if (c->x86_vfm < INTEL_NEHALEM_EP && this_cpu_read(mce_num_banks))
+ this_cpu_ptr(mce_banks_array)[0].init = false;
+}
+
void mce_intel_feature_init(struct cpuinfo_x86 *c)
{
+ intel_apply_quirks(c);
intel_init_cmci();
intel_init_lmce();
intel_imc_init(c);
--
2.43.0
next prev parent reply other threads:[~2025-02-13 16:46 UTC|newest]
Thread overview: 63+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-13 16:45 [PATCH v2 00/16] AMD MCA interrupts rework Yazen Ghannam
2025-02-13 16:45 ` [PATCH v2 01/16] x86/mce: Don't remove sysfs if thresholding sysfs init fails Yazen Ghannam
2025-02-17 6:58 ` Zhuo, Qiuxu
2025-02-13 16:45 ` [PATCH v2 02/16] x86/mce/amd: Remove return value for mce_threshold_create_device() Yazen Ghannam
2025-02-17 7:11 ` Zhuo, Qiuxu
2025-02-17 14:09 ` Yazen Ghannam
2025-02-13 16:45 ` [PATCH v2 03/16] x86/mce/amd: Remove smca_banks_map Yazen Ghannam
2025-02-17 7:57 ` Zhuo, Qiuxu
2025-02-17 14:17 ` Yazen Ghannam
2025-02-13 16:45 ` [PATCH v2 04/16] x86/mce/amd: Put list_head in threshold_bank Yazen Ghannam
2025-02-18 1:28 ` Zhuo, Qiuxu
2025-02-13 16:45 ` [PATCH v2 05/16] x86/mce: Cleanup bank processing on init Yazen Ghannam
2025-02-13 22:32 ` Luck, Tony
2025-02-17 13:55 ` Yazen Ghannam
2025-02-18 16:40 ` Luck, Tony
2025-02-18 2:15 ` Zhuo, Qiuxu
2025-02-13 16:45 ` [PATCH v2 06/16] x86/mce: Remove __mcheck_cpu_init_early() Yazen Ghannam
2025-02-18 3:00 ` Zhuo, Qiuxu
2025-02-19 15:53 ` Yazen Ghannam
2025-02-27 15:25 ` Borislav Petkov
2025-02-27 16:31 ` Yazen Ghannam
2025-02-27 19:33 ` Borislav Petkov
2025-02-27 19:59 ` Yazen Ghannam
2025-02-27 20:48 ` Borislav Petkov
2025-02-28 14:29 ` Yazen Ghannam
2025-02-13 16:45 ` [PATCH v2 07/16] x86/mce: Define BSP-only init Yazen Ghannam
2025-02-18 3:16 ` Zhuo, Qiuxu
2025-02-19 15:57 ` Yazen Ghannam
2025-02-20 1:37 ` Zhuo, Qiuxu
2025-02-20 14:36 ` Yazen Ghannam
2025-02-24 13:28 ` Zhuo, Qiuxu
2025-02-13 16:45 ` [PATCH v2 08/16] x86/mce: Define BSP-only SMCA init Yazen Ghannam
2025-02-18 3:33 ` Zhuo, Qiuxu
2025-02-19 16:01 ` Yazen Ghannam
2025-02-13 16:45 ` [PATCH v2 09/16] x86/mce: Do 'UNKNOWN' vendor check early Yazen Ghannam
2025-02-18 5:31 ` Zhuo, Qiuxu
2025-02-13 16:45 ` Yazen Ghannam [this message]
2025-02-18 6:03 ` [PATCH v2 10/16] x86/mce: Separate global and per-CPU quirks Zhuo, Qiuxu
2025-02-19 16:06 ` Yazen Ghannam
2025-02-20 1:27 ` Zhuo, Qiuxu
2025-02-20 14:37 ` Yazen Ghannam
2025-02-13 16:46 ` [PATCH v2 11/16] x86/mce: Move machine_check_poll() status checks to helper functions Yazen Ghannam
2025-02-18 6:29 ` Zhuo, Qiuxu
2025-02-13 16:46 ` [PATCH v2 12/16] x86/mce: Unify AMD THR handler with MCA Polling Yazen Ghannam
2025-02-18 6:42 ` Zhuo, Qiuxu
2025-02-19 16:07 ` Yazen Ghannam
2025-02-13 16:46 ` [PATCH v2 13/16] x86/mce: Unify AMD DFR " Yazen Ghannam
2025-02-18 7:37 ` Zhuo, Qiuxu
2025-02-19 16:09 ` Yazen Ghannam
2025-02-20 1:41 ` Zhuo, Qiuxu
2025-02-20 14:41 ` Yazen Ghannam
2025-02-24 13:31 ` Zhuo, Qiuxu
2025-02-13 16:46 ` [PATCH v2 14/16] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA systems Yazen Ghannam
2025-02-18 8:23 ` Zhuo, Qiuxu
2025-02-19 16:16 ` Yazen Ghannam
2025-02-13 16:46 ` [PATCH v2 15/16] x86/mce/amd: Support SMCA Corrected Error Interrupt Yazen Ghannam
2025-02-13 22:34 ` Luck, Tony
2025-02-17 14:06 ` Yazen Ghannam
2025-02-18 13:27 ` Zhuo, Qiuxu
2025-02-19 16:19 ` Yazen Ghannam
2025-02-13 16:46 ` [PATCH v2 16/16] x86/mce: Handle AMD threshold interrupt storms Yazen Ghannam
2025-02-18 13:51 ` Zhuo, Qiuxu
2025-02-13 22:40 ` [PATCH v2 00/16] AMD MCA interrupts rework Luck, Tony
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250213-wip-mca-updates-v2-10-3636547fe05f@amd.com \
--to=yazen.ghannam@amd.com \
--cc=Smita.KoralahalliChannabasappa@amd.com \
--cc=linux-edac@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tony.luck@intel.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox