Re: [PATCH v7 2/8] x86/mce: Unify AMD DFR handler with MCA Polling

Linux EDAC development
 help / color / mirror / Atom feed

From: Yazen Ghannam <yazen.ghannam@amd.com>
To: Borislav Petkov <bp@alien8.de>
Cc: x86@kernel.org, Tony Luck <tony.luck@intel.com>,
	"Rafael J. Wysocki" <rafael@kernel.org>,
	Len Brown <lenb@kernel.org>,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org,
	Smita.KoralahalliChannabasappa@amd.com,
	Qiuxu Zhuo <qiuxu.zhuo@intel.com>,
	Nikolay Borisov <nik.borisov@suse.com>,
	Bert Karwatzki <spasswolf@web.de>,
	linux-acpi@vger.kernel.org
Subject: Re: [PATCH v7 2/8] x86/mce: Unify AMD DFR handler with MCA Polling
Date: Mon, 27 Oct 2025 09:35:42 -0400	[thread overview]
Message-ID: <20251027133542.GA8279@yaz-khff2.amd.com> (raw)
In-Reply-To: <20251025150304.GXaPzmqFawI0NrCC-0@fat_crate.local>

On Sat, Oct 25, 2025 at 05:03:04PM +0200, Borislav Petkov wrote:
> On Fri, Oct 24, 2025 at 11:27:23PM +0200, Borislav Petkov wrote:
> > On Fri, Oct 24, 2025 at 04:30:12PM -0400, Yazen Ghannam wrote:
> > > Should I send another revision?
> > 
> > Nah, I'm not done simplifying this yet. :-P
> 
> Yeah, no, looks ok now:
> 
> ---
> From: Yazen Ghannam <yazen.ghannam@amd.com>
> Date: Thu, 16 Oct 2025 16:37:47 +0000
> Subject: [PATCH] x86/mce: Unify AMD DFR handler with MCA Polling
> 
> AMD systems optionally support a deferred error interrupt. The interrupt
> should be used as another signal to trigger MCA polling. This is similar to
> how other MCA interrupts are handled.
> 
> Deferred errors do not require any special handling related to the interrupt,
> e.g. resetting or rearming the interrupt, etc.
> 
> However, Scalable MCA systems include a pair of registers, MCA_DESTAT and
> MCA_DEADDR, that should be checked for valid errors. This check should be done
> whenever MCA registers are polled. Currently, the deferred error interrupt
> does this check, but the MCA polling function does not.
> 
> Call the MCA polling function when handling the deferred error interrupt. This
> keeps all "polling" cases in a common function.
> 
> Add an SMCA status check helper. This will do the same status check and
> register clearing that the interrupt handler has done. And it extends the
> common polling flow to find AMD deferred errors.
> 
> Clear the MCA_DESTAT register at the end of the handler rather than the
> beginning. This maintains the procedure that the 'status' register must be
> cleared as the final step.
> 
>   [ bp: Zap commit message pieces explaining what the patch does;
>         zap unnecessary special-casing of deferred errors. ]
> 
> Signed-off-by: Yazen Ghannam <yazen.ghannam@amd.com>
> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
> Link: https://lore.kernel.org/all/20251016-wip-mca-updates-v7-0-5c139a4062cb@amd.com
> ---
>  arch/x86/include/asm/mce.h     |   6 ++
>  arch/x86/kernel/cpu/mce/amd.c  | 111 ++++-----------------------------
>  arch/x86/kernel/cpu/mce/core.c |  44 ++++++++++++-
>  3 files changed, 62 insertions(+), 99 deletions(-)
> 
> diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
> index 31e3cb550fb3..7d6588195d56 100644
> --- a/arch/x86/include/asm/mce.h
> +++ b/arch/x86/include/asm/mce.h
> @@ -165,6 +165,12 @@
>   */
>  #define MCE_IN_KERNEL_COPYIN	BIT_ULL(7)
>  
> +/*
> + * Indicates that handler should check and clear Deferred error registers
> + * rather than common ones.
> + */
> +#define MCE_CHECK_DFR_REGS	BIT_ULL(8)
> +
>  /*
>   * This structure contains all data related to the MCE log.  Also
>   * carries a signature to make it easier to find from external
> diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
> index ac6a98aa7bc2..d9f9ee7db5c8 100644
> --- a/arch/x86/kernel/cpu/mce/amd.c
> +++ b/arch/x86/kernel/cpu/mce/amd.c
> @@ -56,6 +56,7 @@ static bool thresholding_irq_en;
>  
>  struct mce_amd_cpu_data {
>  	mce_banks_t     thr_intr_banks;
> +	mce_banks_t     dfr_intr_banks;
>  };
>  
>  static DEFINE_PER_CPU_READ_MOSTLY(struct mce_amd_cpu_data, mce_amd_data);
> @@ -300,8 +301,10 @@ static void smca_configure(unsigned int bank, unsigned int cpu)
>  		 * APIC based interrupt. First, check that no interrupt has been
>  		 * set.
>  		 */
> -		if ((low & BIT(5)) && !((high >> 5) & 0x3))
> +		if ((low & BIT(5)) && !((high >> 5) & 0x3)) {
> +			__set_bit(bank, this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
>  			high |= BIT(5);
> +		}
>  
>  		this_cpu_ptr(mce_banks_array)[bank].lsb_in_status = !!(low & BIT(8));
>  
> @@ -792,37 +795,6 @@ bool amd_mce_usable_address(struct mce *m)
>  	return false;
>  }
>  
> -static void __log_error(unsigned int bank, u64 status, u64 addr, u64 misc)
> -{
> -	struct mce_hw_err err;
> -	struct mce *m = &err.m;
> -
> -	mce_prep_record(&err);
> -
> -	m->status = status;
> -	m->misc   = misc;
> -	m->bank   = bank;
> -	m->tsc	 = rdtsc();
> -
> -	if (m->status & MCI_STATUS_ADDRV) {
> -		m->addr = addr;
> -
> -		smca_extract_err_addr(m);
> -	}
> -
> -	if (mce_flags.smca) {
> -		rdmsrq(MSR_AMD64_SMCA_MCx_IPID(bank), m->ipid);
> -
> -		if (m->status & MCI_STATUS_SYNDV) {
> -			rdmsrq(MSR_AMD64_SMCA_MCx_SYND(bank), m->synd);
> -			rdmsrq(MSR_AMD64_SMCA_MCx_SYND1(bank), err.vendor.amd.synd1);
> -			rdmsrq(MSR_AMD64_SMCA_MCx_SYND2(bank), err.vendor.amd.synd2);
> -		}
> -	}
> -
> -	mce_log(&err);
> -}
> -
>  DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
>  {
>  	trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR);
> @@ -832,75 +804,10 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_deferred_error)
>  	apic_eoi();
>  }
>  
> -/*
> - * Returns true if the logged error is deferred. False, otherwise.
> - */
> -static inline bool
> -_log_error_bank(unsigned int bank, u32 msr_stat, u32 msr_addr, u64 misc)
> -{
> -	u64 status, addr = 0;
> -
> -	rdmsrq(msr_stat, status);
> -	if (!(status & MCI_STATUS_VAL))
> -		return false;
> -
> -	if (status & MCI_STATUS_ADDRV)
> -		rdmsrq(msr_addr, addr);
> -
> -	__log_error(bank, status, addr, misc);
> -
> -	wrmsrq(msr_stat, 0);
> -
> -	return status & MCI_STATUS_DEFERRED;
> -}
> -
> -static bool _log_error_deferred(unsigned int bank, u32 misc)
> -{
> -	if (!_log_error_bank(bank, mca_msr_reg(bank, MCA_STATUS),
> -			     mca_msr_reg(bank, MCA_ADDR), misc))
> -		return false;
> -
> -	/*
> -	 * Non-SMCA systems don't have MCA_DESTAT/MCA_DEADDR registers.
> -	 * Return true here to avoid accessing these registers.
> -	 */
> -	if (!mce_flags.smca)
> -		return true;
> -
> -	/* Clear MCA_DESTAT if the deferred error was logged from MCA_STATUS. */
> -	wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(bank), 0);
> -	return true;
> -}
> -
> -/*
> - * We have three scenarios for checking for Deferred errors:
> - *
> - * 1) Non-SMCA systems check MCA_STATUS and log error if found.
> - * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
> - *    clear MCA_DESTAT.
> - * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
> - *    log it.
> - */
> -static void log_error_deferred(unsigned int bank)
> -{
> -	if (_log_error_deferred(bank, 0))
> -		return;
> -
> -	/*
> -	 * Only deferred errors are logged in MCA_DE{STAT,ADDR} so just check
> -	 * for a valid error.
> -	 */
> -	_log_error_bank(bank, MSR_AMD64_SMCA_MCx_DESTAT(bank),
> -			      MSR_AMD64_SMCA_MCx_DEADDR(bank), 0);
> -}
> -
>  /* APIC interrupt handler for deferred errors */
>  static void amd_deferred_error_interrupt(void)
>  {
> -	unsigned int bank;
> -
> -	for (bank = 0; bank < this_cpu_read(mce_num_banks); ++bank)
> -		log_error_deferred(bank);
> +	machine_check_poll(MCP_TIMESTAMP, &this_cpu_ptr(&mce_amd_data)->dfr_intr_banks);
>  }
>  
>  static void reset_block(struct threshold_block *block)
> @@ -952,6 +859,14 @@ void amd_clear_bank(struct mce *m)
>  {
>  	amd_reset_thr_limit(m->bank);
>  
> +	/* Clear MCA_DESTAT for all deferred errors even those logged in MCA_STATUS. */
> +	if (m->status & MCI_STATUS_DEFERRED)
> +		mce_wrmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank), 0);
> +
> +	/* Don't clear MCA_STATUS if MCA_DESTAT was used exclusively. */
> +	if (m->kflags & MCE_CHECK_DFR_REGS)
> +		return;
> +
>  	mce_wrmsrq(mca_msr_reg(m->bank, MCA_STATUS), 0);
>  }
>  
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 460e90a1a0b1..7be062429ce3 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -687,7 +687,10 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
>  		m->misc = mce_rdmsrq(mca_msr_reg(i, MCA_MISC));
>  
>  	if (m->status & MCI_STATUS_ADDRV) {
> -		m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
> +		if (m->kflags & MCE_CHECK_DFR_REGS)
> +			m->addr = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DEADDR(i));
> +		else
> +			m->addr = mce_rdmsrq(mca_msr_reg(i, MCA_ADDR));
>  
>  		/*
>  		 * Mask the reported address by the reported granularity.
> @@ -714,6 +717,42 @@ static noinstr void mce_read_aux(struct mce_hw_err *err, int i)
>  
>  DEFINE_PER_CPU(unsigned, mce_poll_count);
>  
> +/*
> + * We have three scenarios for checking for Deferred errors:
> + *
> + * 1) Non-SMCA systems check MCA_STATUS and log error if found.
> + * 2) SMCA systems check MCA_STATUS. If error is found then log it and also
> + *    clear MCA_DESTAT.
> + * 3) SMCA systems check MCA_DESTAT, if error was not found in MCA_STATUS, and
> + *    log it.
> + */
> +static bool smca_should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
> +{
> +	struct mce *m = &err->m;
> +
> +	/*
> +	 * If the MCA_STATUS register has a deferred error, then continue using it as
> +	 * the status register.
> +	 *
> +	 * MCA_DESTAT will be cleared at the end of the handler.
> +	 */
> +	if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED))
> +		return true;
> +
> +	/*
> +	 * If the MCA_DESTAT register has a deferred error, then use it instead.
> +	 *
> +	 * MCA_STATUS will not be cleared at the end of the handler.
> +	 */
> +	m->status = mce_rdmsrq(MSR_AMD64_SMCA_MCx_DESTAT(m->bank));
> +	if ((m->status & MCI_STATUS_VAL) && (m->status & MCI_STATUS_DEFERRED)) {
> +		m->kflags |= MCE_CHECK_DFR_REGS;
> +		return true;
> +	}
> +
> +	return false;
> +}
> +

No, this still isn't right. Sorry, I had a brain freeze before.

This function only returns true for valid deferred errors. Other errors
return false.

>  /*
>   * Newer Intel systems that support software error
>   * recovery need to make additional checks. Other
> @@ -740,6 +779,9 @@ static bool should_log_poll_error(enum mcp_flags flags, struct mce_hw_err *err)
>  {
>  	struct mce *m = &err->m;
>  
> +	if (mce_flags.smca)
> +		return smca_should_log_poll_error(flags, err);
> +

This will never find corrected errors or uncorrected (non-deferred)
errors. That's one of the reasons to add the MCP_DFR flag.

Otherwise, we'd need to include some of the same checks from below.

>  	/* If this entry is not valid, ignore it. */
>  	if (!(m->status & MCI_STATUS_VAL))
>  		return false;
> -- 

Thanks,
Yazen

next prev parent reply	other threads:[~2025-10-27 13:35 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-16 16:37 [PATCH v7 0/8] AMD MCA interrupts rework Yazen Ghannam
2025-10-16 16:37 ` [PATCH v7 1/8] x86/mce: Unify AMD THR handler with MCA Polling Yazen Ghannam
2025-10-16 16:37 ` [PATCH v7 2/8] x86/mce: Unify AMD DFR " Yazen Ghannam
2025-10-24 15:03   ` Borislav Petkov
2025-10-24 20:30     ` Yazen Ghannam
2025-10-24 21:27       ` Borislav Petkov
2025-10-25 15:03         ` Borislav Petkov
2025-10-27 13:35           ` Yazen Ghannam [this message]
2025-10-27 14:11             ` Yazen Ghannam
2025-10-28 15:22               ` Borislav Petkov
2025-10-28 15:42                 ` Yazen Ghannam
2025-10-28 17:46                   ` Borislav Petkov
2025-10-28 20:37                     ` Yazen Ghannam
2025-10-28 23:18                       ` Borislav Petkov
2025-10-29 15:09                         ` Yazen Ghannam
2025-10-29 16:02                           ` Borislav Petkov
2025-10-16 16:37 ` [PATCH v7 3/8] x86/mce/amd: Enable interrupt vectors once per-CPU on SMCA systems Yazen Ghannam
2025-10-16 16:37 ` [PATCH v7 4/8] x86/mce/amd: Support SMCA Corrected Error Interrupt Yazen Ghannam
2025-10-16 16:37 ` [PATCH v7 5/8] x86/mce/amd: Remove redundant reset_block() Yazen Ghannam
2025-10-16 16:37 ` [PATCH v7 6/8] x86/mce/amd: Define threshold restart function for banks Yazen Ghannam
2025-10-16 16:37 ` [PATCH v7 7/8] x86/mce: Handle AMD threshold interrupt storms Yazen Ghannam
2025-10-16 16:37 ` [PATCH v7 8/8] x86/mce: Save and use APEI corrected threshold limit Yazen Ghannam
2025-11-02 12:32   ` Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20251027133542.GA8279@yaz-khff2.amd.com \
    --to=yazen.ghannam@amd.com \
    --cc=Smita.KoralahalliChannabasappa@amd.com \
    --cc=bp@alien8.de \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=nik.borisov@suse.com \
    --cc=qiuxu.zhuo@intel.com \
    --cc=rafael@kernel.org \
    --cc=spasswolf@web.de \
    --cc=tony.luck@intel.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox