[PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
@ 2026-01-12  8:27 lirongqing
  2026-01-12  8:56 ` Nikolay Borisov
  0 siblings, 1 reply; 55+ messages in thread
From: lirongqing @ 2026-01-12  8:27 UTC (permalink / raw)
  To: Thomas Gleixner, Ingo Molnar, Borislav Petkov, Dave Hansen, x86,
	H . Peter Anvin, Tony Luck, Yazen Ghannam, Nikolay Borisov,
	Qiuxu Zhuo, Avadhut Naik, linux-kernel, linux-edac
  Cc: Li RongQing

From: Li RongQing <lirongqing@baidu.com>

Since commit 011d82611172 ("RAS: Add a Corrected Errors Collector"),
mce_notify_irq() in should_enable_timer() always returns false even
when an MCE event is logged, because bit 0 of mce_need_notify is not
set in mce_log. This prevents the timer interval from being properly
adjusted.

Fix this by modifying machine_check_poll() to return a boolean indicating
whether an MCE was logged, and update mc_poll_banks() to propagate this
return value. The timer interval logic in mce_timer_fn() now uses this
return value directly instead of relying on mce_notify_irq().

Fixes: 011d82611172 ("RAS: Add a Corrected Errors Collector")
Signed-off-by: Li RongQing <lirongqing@baidu.com>
---
 arch/x86/include/asm/mce.h         |  2 +-
 arch/x86/kernel/cpu/mce/core.c     | 17 +++++++++++------
 arch/x86/kernel/cpu/mce/intel.c    |  8 ++++++--
 arch/x86/kernel/cpu/mce/internal.h |  2 +-
 4 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 2d98886..fb9eab4 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -303,7 +303,7 @@ enum mcp_flags {
 	MCP_QUEUE_LOG	= BIT(2),	/* only queue to genpool */
 };
 
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
 
 DECLARE_PER_CPU(struct mce, injectm);
 
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 3444002..8d42691 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -813,10 +813,11 @@ static void clear_bank(struct mce *m)
  * is already totally * confused. In this case it's likely it will
  * not fully execute the machine check handler either.
  */
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
+bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 {
 	struct mce_bank *mce_banks = this_cpu_ptr(mce_banks_array);
 	struct mce_hw_err err;
+	bool logged = false;
 	struct mce *m;
 	int i;
 
@@ -868,6 +869,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 		else
 			mce_log(&err);
 
+		logged = true;
 clear_it:
 		clear_bank(m);
 	}
@@ -878,6 +880,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
 	 */
 
 	sync_core();
+
+	return logged;
 }
 EXPORT_SYMBOL_GPL(machine_check_poll);
 
@@ -1776,12 +1780,12 @@ static void __start_timer(struct timer_list *t, unsigned long interval)
 	local_irq_restore(flags);
 }
 
-static void mc_poll_banks_default(void)
+static bool mc_poll_banks_default(void)
 {
-	machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+	return machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
 }
 
-void (*mc_poll_banks)(void) = mc_poll_banks_default;
+bool (*mc_poll_banks)(void) = mc_poll_banks_default;
 
 static bool should_enable_timer(unsigned long iv)
 {
@@ -1792,19 +1796,20 @@ static void mce_timer_fn(struct timer_list *t)
 {
 	struct timer_list *cpu_t = this_cpu_ptr(&mce_timer);
 	unsigned long iv;
+	bool logged = false;
 
 	WARN_ON(cpu_t != t);
 
 	iv = __this_cpu_read(mce_next_interval);
 
 	if (mce_available(this_cpu_ptr(&cpu_info)))
-		mc_poll_banks();
+		logged = mc_poll_banks();
 
 	/*
 	 * Alert userspace if needed. If we logged an MCE, reduce the polling
 	 * interval, otherwise increase the polling interval.
 	 */
-	if (mce_notify_irq())
+	if (logged)
 		iv = max(iv / 2, (unsigned long) HZ/100);
 	else
 		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c
index 4655223..a3d2730 100644
--- a/arch/x86/kernel/cpu/mce/intel.c
+++ b/arch/x86/kernel/cpu/mce/intel.c
@@ -395,11 +395,15 @@ void cmci_disable_bank(int bank)
 }
 
 /* Bank polling function when CMCI is disabled. */
-static void cmci_mc_poll_banks(void)
+static bool cmci_mc_poll_banks(void)
 {
+	bool logged;
+
 	spin_lock(&cmci_poll_lock);
-	machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
+	logged = machine_check_poll(0, this_cpu_ptr(&mce_poll_banks));
 	spin_unlock(&cmci_poll_lock);
+
+	return logged;
 }
 
 void intel_init_cmci(void)
diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h
index a31cf98..7bf2360 100644
--- a/arch/x86/kernel/cpu/mce/internal.h
+++ b/arch/x86/kernel/cpu/mce/internal.h
@@ -348,5 +348,5 @@ static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg)
 	return 0;
 }
 
-extern void (*mc_poll_banks)(void);
+extern bool (*mc_poll_banks)(void);
 #endif /* __X86_MCE_INTERNAL_H__ */
-- 
2.9.4


^ permalink raw reply related	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-12  8:27 [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event lirongqing
@ 2026-01-12  8:56 ` Nikolay Borisov
  2026-01-12  9:36   ` 答复: [外部邮件] " Li,Rongqing
  0 siblings, 1 reply; 55+ messages in thread
From: Nikolay Borisov @ 2026-01-12  8:56 UTC (permalink / raw)
  To: lirongqing, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	Dave Hansen, x86, H . Peter Anvin, Tony Luck, Yazen Ghannam,
	Qiuxu Zhuo, Avadhut Naik, linux-kernel, linux-edac

On 12.01.26 г. 10:27 ч., lirongqing wrote:
> From: Li RongQing <lirongqing@baidu.com>
> 
> Since commit 011d82611172 ("RAS: Add a Corrected Errors Collector"),
> mce_notify_irq() in should_enable_timer() always returns false even

should_enable_timer doesn't call mce_notify_irq

> when an MCE event is logged, because bit 0 of mce_need_notify is not
> set in mce_log. This prevents the timer interval from being properly
> adjusted.
> 
> Fix this by modifying machine_check_poll() to return a boolean indicating
> whether an MCE was logged, and update mc_poll_banks() to propagate this
> return value. The timer interval logic in mce_timer_fn() now uses this
> return value directly instead of relying on mce_notify_irq().

This warrants a bit more explanation why it's correct. Because 
mce_notify_irq is really a misnomer, it will ideally be named 
mce_notify_user(). That function is called from 2 places:

1. Early notifier block, but there it's guaranteed to do the right thing 
as mce_need_notify is explicitly set.

2. From the timer function, where as you have pointed out 
mce_need_notify is never set by the polling code, hence the function is 
a noop. But actually calling mce_log() processes all logged errors in 
the gen pool and that is processed by calling the x86_mce_decoder_chain 
which will DTRT w.r.t to mce_notify_irq since the early notifier will be 
called from there.

<snip>

^ permalink raw reply	[flat|nested] 55+ messages in thread

* 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-12  8:56 ` Nikolay Borisov
@ 2026-01-12  9:36   ` Li,Rongqing
  2026-01-12  9:51     ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Li,Rongqing @ 2026-01-12  9:36 UTC (permalink / raw)
  To: Nikolay Borisov, Thomas Gleixner, Ingo Molnar, Borislav Petkov,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Tony Luck,
	Yazen Ghannam, Qiuxu Zhuo, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

> 
> On 12.01.26 г. 10:27 ч., lirongqing wrote:
> > From: Li RongQing <lirongqing@baidu.com>
> >
> > Since commit 011d82611172 ("RAS: Add a Corrected Errors Collector"),
> > mce_notify_irq() in should_enable_timer() always returns false even
> 
> should_enable_timer doesn't call mce_notify_irq
> 
> > when an MCE event is logged, because bit 0 of mce_need_notify is not
> > set in mce_log. This prevents the timer interval from being properly
> > adjusted.
> >
> > Fix this by modifying machine_check_poll() to return a boolean
> > indicating whether an MCE was logged, and update mc_poll_banks() to
> > propagate this return value. The timer interval logic in
> > mce_timer_fn() now uses this return value directly instead of relying on
> mce_notify_irq().
> 
> This warrants a bit more explanation why it's correct. Because mce_notify_irq is
> really a misnomer, it will ideally be named mce_notify_user(). That function is
> called from 2 places:
> 
> 1. Early notifier block, but there it's guaranteed to do the right thing as
> mce_need_notify is explicitly set.
> 
> 2. From the timer function, where as you have pointed out mce_need_notify is
> never set by the polling code, hence the function is a noop. But actually calling
> mce_log() processes all logged errors in the gen pool and that is processed by
> calling the x86_mce_decoder_chain which will DTRT w.r.t to mce_notify_irq
> since the early notifier will be called from there.
> 
> <snip>

Ok, I will add more explanation, and rename mce_notify_irq() as mce_notify_user();

Thanks

-Li



^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-12  9:36   ` 答复: [外部邮件] " Li,Rongqing
@ 2026-01-12  9:51     ` Borislav Petkov
  2026-01-12 10:24       ` 答复: " Li,Rongqing
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-01-12  9:51 UTC (permalink / raw)
  To: Li,Rongqing
  Cc: Nikolay Borisov, Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Tony Luck, Yazen Ghannam,
	Qiuxu Zhuo, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Mon, Jan 12, 2026 at 09:36:21AM +0000, Li,Rongqing wrote:
> Ok, I will add more explanation, and rename mce_notify_irq() as
> mce_notify_user();

No, first you should explain what you're fixing here and why.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-12  9:51     ` Borislav Petkov
@ 2026-01-12 10:24       ` Li,Rongqing
  2026-01-13  9:51         ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Li,Rongqing @ 2026-01-12 10:24 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Nikolay Borisov, Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Tony Luck, Yazen Ghannam,
	Qiuxu Zhuo, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

> On Mon, Jan 12, 2026 at 09:36:21AM +0000, Li,Rongqing wrote:
> > Ok, I will add more explanation, and rename mce_notify_irq() as
> > mce_notify_user();
> 
> No, first you should explain what you're fixing here and why.
> 
> --
> Regards/Gruss,
>     Boris.
> 

How about modifying the changelog as follows


    x86/mce: Fix timer interval adjustment after logging a MCE event

    Since commit 011d82611172 ("RAS: Add a Corrected Errors Collector"),
    mce_timer_fn() has incorrectly determined whether to adjust the
    timer interval. The issue arises because mce_notify_irq() now always
    returns false when called from the timer path, since the polling code
    never sets bit 0 of mce_need_notify. This prevents proper adjustment of
    the timer interval based on whether MCE events were logged.

    The mce_notify_irq() is called from two contexts:
    1. Early notifier block - correctly sets mce_need_notify
    2. Timer function - never sets mce_need_notify, making it a noop
       (though logged errors are still processed through mce_log()->
        x86_mce_decoder_chain -> early notifier).

    Fix this by modifying machine_check_poll() to return a boolean indicating
    whether any MCE was logged, and updating mc_poll_banks() and related
    functions to propagate this return value. Then, mce_timer_fn() can use
    this direct return value instead of relying on mce_notify_irq() for
    timer interval decisions.

    This ensures the timer interval is correctly reduced when MCE events are
    logged and increased when no events occur.

    Fixes: 011d82611172 ("RAS: Add a Corrected Errors Collector")
> https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-12 10:24       ` 答复: " Li,Rongqing
@ 2026-01-13  9:51         ` Borislav Petkov
       [not found]           ` <39cfb093256f4da78fe0bc9e814ce5d0@baidu.com>
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-01-13  9:51 UTC (permalink / raw)
  To: Li,Rongqing
  Cc: Nikolay Borisov, Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Tony Luck, Yazen Ghannam,
	Qiuxu Zhuo, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Mon, Jan 12, 2026 at 10:24:11AM +0000, Li,Rongqing wrote:
>     Since commit 011d82611172 ("RAS: Add a Corrected Errors Collector"),
>     mce_timer_fn() has incorrectly determined whether to adjust the
>     timer interval. The issue arises because mce_notify_irq() now always
>     returns false when called from the timer path, since the polling code
>     never sets bit 0 of mce_need_notify. This prevents proper adjustment of
>     the timer interval based on whether MCE events were logged.

That's because you missed the main point of the error collector:

    "The error decoding is done with the decoding chain now and
    mce_first_notifier() gets to see the error first and the CEC decides
    whether to log it and then the rest of the chain doesn't hear about it -
    						     ^^^^^^^^^^^^^^^^^^^^^^

    basically the main reason for the CE collector - or to continue running
    the notifiers."

So lemme ask you again: what are you really fixing?!

What is the actual problem you're trying to fix?

And do not send me another revised commit message.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
       [not found]           ` <39cfb093256f4da78fe0bc9e814ce5d0@baidu.com>
@ 2026-01-13 12:48             ` Borislav Petkov
  2026-01-13 18:53               ` Luck, Tony
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-01-13 12:48 UTC (permalink / raw)
  To: Li,Rongqing
  Cc: Nikolay Borisov, Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Tony Luck, Yazen Ghannam,
	Qiuxu Zhuo, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Tue, Jan 13, 2026 at 12:37:02PM +0000, Li,Rongqing wrote:
> The comment in mce_timer_fn says to adjust the polling interval, but
> I notice the kernel log always shows an MCE log every 5 minutes. Is this
> normal?

Use git annotate to figure out which patch added this comment and in context
of what and that'll tell you why.

As to the 5 minutes, look at how the check interval gets established.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* RE: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 12:48             ` 答复: " Borislav Petkov
@ 2026-01-13 18:53               ` Luck, Tony
  2026-01-13 18:55                 ` Nikolay Borisov
                                   ` (4 more replies)
  0 siblings, 5 replies; 55+ messages in thread
From: Luck, Tony @ 2026-01-13 18:53 UTC (permalink / raw)
  To: Borislav Petkov, Li, Rongqing
  Cc: Nikolay Borisov, Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Yazen Ghannam, Zhuo, Qiuxu,
	Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

> > The comment in mce_timer_fn says to adjust the polling interval, but
> > I notice the kernel log always shows an MCE log every 5 minutes. Is this
> > normal?
>
> Use git annotate to figure out which patch added this comment and in context
> of what and that'll tell you why.
>
> As to the 5 minutes, look at how the check interval gets established.

Once upon a time the polling interval started out at 5 minutes, but the
interval was halved each time an error was found (so interval went
150s, 75s, 37s, ... down to 1s). If no error was found, then the interval
was doubled (going back up to 300s).

This is described in the comment:

        /*
         * Alert userspace if needed. If we logged an MCE, reduce the polling
         * interval, otherwise increase the polling interval.
         */

It seems that the kernel isn't doing that today. Polling at a fixed 300 seconds
event though errors are being found and logged. Interesting that the timestamps
are 327.68 seconds apart, rather than 300 and change. So there is some strange
stuff going on.

I can reproduce here on an Icelake system. Booted with mce=no_cmci to force polling
(and turned of BIOS firmware first mode).  Injecting an error every 30 seconds I also see
constant 327 seconds between logs (multiple logs show up because my injection picks memory
channel "randomly", so there can be several banks with errors when polling happens).

$ dmesg | grep 'Machine Check Event:'
[  662.632988] EDAC skx MC4: CPU 40: Machine Check Event: 0x0 Bank 13: 0x8c00014200800090
[  662.727377] EDAC skx MC6: CPU 40: Machine Check Event: 0x0 Bank 21: 0x8c0000c200800090
[  990.283484] EDAC skx MC4: CPU 121: Machine Check Event: 0x0 Bank 13: 0x8c00010200800090
[  990.378233] EDAC skx MC6: CPU 121: Machine Check Event: 0x0 Bank 21: 0x8c00014200800090
[  990.467199] EDAC skx MC0: CPU 3: Machine Check Event: 0x0 Bank 13: 0x8c00004200800090
[ 1317.939260] EDAC skx MC4: CPU 122: Machine Check Event: 0x0 Bank 13: 0x8c00010200800090
[ 1318.033721] EDAC skx MC6: CPU 122: Machine Check Event: 0x0 Bank 21: 0x8c00010200800090
[ 1318.122612] EDAC skx MC0: CPU 14: Machine Check Event: 0x0 Bank 13: 0x8c00004200800090
[ 1318.211507] EDAC skx MC2: CPU 14: Machine Check Event: 0x0 Bank 21: 0x8c00004200800090
[ 1645.590773] EDAC skx MC4: CPU 129: Machine Check Event: 0x0 Bank 13: 0x8c00010200800090
[ 1645.685153] EDAC skx MC6: CPU 129: Machine Check Event: 0x0 Bank 21: 0x8c00018200800090
[ 1645.773744] EDAC skx MC0: CPU 100: Machine Check Event: 0x0 Bank 13: 0x8c00004200800090

-Tony


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 18:53               ` Luck, Tony
@ 2026-01-13 18:55                 ` Nikolay Borisov
  2026-01-13 19:13                   ` Borislav Petkov
  2026-01-13 19:10                 ` Borislav Petkov
                                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 55+ messages in thread
From: Nikolay Borisov @ 2026-01-13 18:55 UTC (permalink / raw)
  To: Luck, Tony, Borislav Petkov, Li, Rongqing
  Cc: Thomas Gleixner, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Zhuo, Qiuxu, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org



On 13.01.26 г. 20:53 ч., Luck, Tony wrote:
>>> The comment in mce_timer_fn says to adjust the polling interval, but
>>> I notice the kernel log always shows an MCE log every 5 minutes. Is this
>>> normal?
>>
>> Use git annotate to figure out which patch added this comment and in context
>> of what and that'll tell you why.
>>
>> As to the 5 minutes, look at how the check interval gets established.
> 
> Once upon a time the polling interval started out at 5 minutes, but the
> interval was halved each time an error was found (so interval went
> 150s, 75s, 37s, ... down to 1s). If no error was found, then the interval
> was doubled (going back up to 300s).
> 
> This is described in the comment:
> 
>          /*
>           * Alert userspace if needed. If we logged an MCE, reduce the polling
>           * interval, otherwise increase the polling interval.
>           */
> 
> It seems that the kernel isn't doing that today. Polling at a fixed 300 seconds
> event though errors are being found and logged. Interesting that the timestamps
> are 327.68 seconds apart, rather than 300 and change. So there is some strange
> stuff going on.
> 
> I can reproduce here on an Icelake system. Booted with mce=no_cmci to force polling
> (and turned of BIOS firmware first mode).  Injecting an error every 30 seconds I also see
> constant 327 seconds between logs (multiple logs show up because my injection picks memory
> channel "randomly", so there can be several banks with errors when polling happens).
> 
> $ dmesg | grep 'Machine Check Event:'
> [  662.632988] EDAC skx MC4: CPU 40: Machine Check Event: 0x0 Bank 13: 0x8c00014200800090
> [  662.727377] EDAC skx MC6: CPU 40: Machine Check Event: 0x0 Bank 21: 0x8c0000c200800090
> [  990.283484] EDAC skx MC4: CPU 121: Machine Check Event: 0x0 Bank 13: 0x8c00010200800090
> [  990.378233] EDAC skx MC6: CPU 121: Machine Check Event: 0x0 Bank 21: 0x8c00014200800090
> [  990.467199] EDAC skx MC0: CPU 3: Machine Check Event: 0x0 Bank 13: 0x8c00004200800090
> [ 1317.939260] EDAC skx MC4: CPU 122: Machine Check Event: 0x0 Bank 13: 0x8c00010200800090
> [ 1318.033721] EDAC skx MC6: CPU 122: Machine Check Event: 0x0 Bank 21: 0x8c00010200800090
> [ 1318.122612] EDAC skx MC0: CPU 14: Machine Check Event: 0x0 Bank 13: 0x8c00004200800090
> [ 1318.211507] EDAC skx MC2: CPU 14: Machine Check Event: 0x0 Bank 21: 0x8c00004200800090
> [ 1645.590773] EDAC skx MC4: CPU 129: Machine Check Event: 0x0 Bank 13: 0x8c00010200800090
> [ 1645.685153] EDAC skx MC6: CPU 129: Machine Check Event: 0x0 Bank 21: 0x8c00018200800090
> [ 1645.773744] EDAC skx MC0: CPU 100: Machine Check Event: 0x0 Bank 13: 0x8c00004200800090
> 
> -Tony
> 

At this stage I think lirongqi's patch is ok, but in the long run (i.e 
tomorrow) I will send a patch that simply eliminates mce_notify_irq's 
call in mce_timer_fn. I.e that function should be called only from the 
early notifier.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 18:53               ` Luck, Tony
  2026-01-13 18:55                 ` Nikolay Borisov
@ 2026-01-13 19:10                 ` Borislav Petkov
  2026-01-13 19:31                 ` Nikolay Borisov
                                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 55+ messages in thread
From: Borislav Petkov @ 2026-01-13 19:10 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Tue, Jan 13, 2026 at 06:53:13PM +0000, Luck, Tony wrote:
> Once upon a time the polling interval started out at 5 minutes, but the
> interval was halved each time an error was found (so interval went
> 150s, 75s, 37s, ... down to 1s). If no error was found, then the interval
> was doubled (going back up to 300s).
> 
> This is described in the comment:
> 
>         /*
>          * Alert userspace if needed. If we logged an MCE, reduce the polling
>          * interval, otherwise increase the polling interval.
>          */
> 
> It seems that the kernel isn't doing that today.

So the halving is not happening anymore because mce_notify_irq() is not
returning true.

I guess we botched that somewhere along the way...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 18:55                 ` Nikolay Borisov
@ 2026-01-13 19:13                   ` Borislav Petkov
  2026-01-13 19:25                     ` Nikolay Borisov
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-01-13 19:13 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Luck, Tony, Li, Rongqing, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Tue, Jan 13, 2026 at 08:55:08PM +0200, Nikolay Borisov wrote:
> tomorrow) I will send a patch that simply eliminates mce_notify_irq's call
> in mce_timer_fn. I.e that function should be called only from the early
> notifier.

You still need to know whether to halve the timeout or not. And that's that
mce_need_notify thing. And that gets called in the early notifier so it should
work.

However, it would be good to be able to bisect this and find a minimal fix to
backport...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 19:13                   ` Borislav Petkov
@ 2026-01-13 19:25                     ` Nikolay Borisov
  2026-01-13 19:33                       ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Nikolay Borisov @ 2026-01-13 19:25 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Luck, Tony, Li, Rongqing, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org



On 13.01.26 г. 21:13 ч., Borislav Petkov wrote:
> On Tue, Jan 13, 2026 at 08:55:08PM +0200, Nikolay Borisov wrote:
>> tomorrow) I will send a patch that simply eliminates mce_notify_irq's call
>> in mce_timer_fn. I.e that function should be called only from the early
>> notifier.
> 
> You still need to know whether to halve the timeout or not. And that's that
> mce_need_notify thing. And that gets called in the early notifier so it should
> work.
> 
> However, it would be good to be able to bisect this and find a minimal fix to
> backport...
> 

mce_need_notify even now works just in the early notifier because it's 
set in mce_early_notifier() and subsequently reset in mce_notify_irq() 
which is called immediately after the set.

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 18:53               ` Luck, Tony
  2026-01-13 18:55                 ` Nikolay Borisov
  2026-01-13 19:10                 ` Borislav Petkov
@ 2026-01-13 19:31                 ` Nikolay Borisov
  2026-01-13 20:30                 ` Thomas Gleixner
  2026-01-13 20:56                 ` Borislav Petkov
  4 siblings, 0 replies; 55+ messages in thread
From: Nikolay Borisov @ 2026-01-13 19:31 UTC (permalink / raw)
  To: Luck, Tony, Borislav Petkov, Li, Rongqing
  Cc: Thomas Gleixner, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Zhuo, Qiuxu, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org



On 13.01.26 г. 20:53 ч., Luck, Tony wrote:
>>> The comment in mce_timer_fn says to adjust the polling interval, but
>>> I notice the kernel log always shows an MCE log every 5 minutes. Is this
>>> normal?
>>
>> Use git annotate to figure out which patch added this comment and in context
>> of what and that'll tell you why.
>>
>> As to the 5 minutes, look at how the check interval gets established.
> 
> Once upon a time the polling interval started out at 5 minutes, but the
> interval was halved each time an error was found (so interval went
> 150s, 75s, 37s, ... down to 1s). If no error was found, then the interval
> was doubled (going back up to 300s).
> 
> This is described in the comment:
> 
>          /*
>           * Alert userspace if needed. If we logged an MCE, reduce the polling
>           * interval, otherwise increase the polling interval.
>           */
> 
> It seems that the kernel isn't doing that today. Polling at a fixed 300 seconds
> event though errors are being found and logged. Interesting that the timestamps
> are 327.68 seconds apart, rather than 300 and change. So there is some strange
> stuff going on.

I think Li Rongqing patch does exactly that, since it predicates the 
halving/doubling of the interval based on whether an error was found and 
not whether it was reported to user space (what mce_notify_irq() ) does. 
Both concepts seems to be independent and the former being the core one 
we care about w.r.t to the decision how to adjust the interval, no ?

<snip>

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 19:25                     ` Nikolay Borisov
@ 2026-01-13 19:33                       ` Borislav Petkov
  2026-01-13 19:37                         ` Nikolay Borisov
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-01-13 19:33 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Luck, Tony, Li, Rongqing, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Tue, Jan 13, 2026 at 09:25:19PM +0200, Nikolay Borisov wrote:
> mce_need_notify even now works just in the early notifier because it's set
> in mce_early_notifier() and subsequently reset in mce_notify_irq() which is
> called immediately after the set.

You still need to call something in mce_timer_fn() to know whether to halve
the interval or not.

I wonder if it would be better/cleaner if mce_early_notifier() halves the
interval and mce_timer_fn() just reads only the current interval value...
maybe that'll be less logic spread around...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 19:33                       ` Borislav Petkov
@ 2026-01-13 19:37                         ` Nikolay Borisov
  2026-01-13 19:44                           ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Nikolay Borisov @ 2026-01-13 19:37 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Luck, Tony, Li, Rongqing, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org



On 13.01.26 г. 21:33 ч., Borislav Petkov wrote:
> On Tue, Jan 13, 2026 at 09:25:19PM +0200, Nikolay Borisov wrote:
>> mce_need_notify even now works just in the early notifier because it's set
>> in mce_early_notifier() and subsequently reset in mce_notify_irq() which is
>> called immediately after the set.
> 
> You still need to call something in mce_timer_fn() to know whether to halve
> the interval or not.

Yes, and why is Li's approach not working, i.e if mc_poll_banks() 
returns a value signalling "i found an MCE" we halve, otherwise we 
double it?


> 
> I wonder if it would be better/cleaner if mce_early_notifier() halves the
> interval and mce_timer_fn() just reads only the current interval value...
> maybe that'll be less logic spread around...
> 

That's certainly doable, but why should the interval setting be coupled 
to the initial notifier and not to mc_poll_banks?

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 19:37                         ` Nikolay Borisov
@ 2026-01-13 19:44                           ` Borislav Petkov
  2026-01-13 19:51                             ` Nikolay Borisov
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-01-13 19:44 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Luck, Tony, Li, Rongqing, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Tue, Jan 13, 2026 at 09:37:13PM +0200, Nikolay Borisov wrote:
> Yes, and why is Li's approach not working, i.e if mc_poll_banks() returns a
> value signalling "i found an MCE" we halve, otherwise we double it?

It might be "working" but I don't like that "bool logged" thing and
marshalling it back'n'forth. This should be waaay simpler.

> That's certainly doable, but why should the interval setting be coupled to
> the initial notifier and not to mc_poll_banks?

What's the difference who notifies the timer? The banks polling will call the
notifier if it finds an error to log.

But before we do any of that, we need to figure out what commit broke this so
that we fix stable.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 19:44                           ` Borislav Petkov
@ 2026-01-13 19:51                             ` Nikolay Borisov
  2026-01-13 20:33                               ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Nikolay Borisov @ 2026-01-13 19:51 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Luck, Tony, Li, Rongqing, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org



On 13.01.26 г. 21:44 ч., Borislav Petkov wrote:
> On Tue, Jan 13, 2026 at 09:37:13PM +0200, Nikolay Borisov wrote:
>> Yes, and why is Li's approach not working, i.e if mc_poll_banks() returns a
>> value signalling "i found an MCE" we halve, otherwise we double it?
> 
> It might be "working" but I don't like that "bool logged" thing and
> marshalling it back'n'forth. This should be waaay simpler.
> 
>> That's certainly doable, but why should the interval setting be coupled to
>> the initial notifier and not to mc_poll_banks?
> 
> What's the difference who notifies the timer? The banks polling will call the
> notifier if it finds an error to log.
> 
> But before we do any of that, we need to figure out what commit broke this so
> that we fix stable.
> 
> Thx.
> 


grepping around points to your 011d82611172 ("RAS: Add a Corrected 
Errors Collector"). Because looking at the commit log of my 6447828875b7 
("x86/mce/inject: Remove call to mce_notify_irq()") I mention that 
mce_need_notify got introduced in your commit and it was called 
"notify_user" before that. Before your commit in mce_log there was an 
explicit set_bit(mce_need_notify) call .

^ permalink raw reply	[flat|nested] 55+ messages in thread

* RE: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 18:53               ` Luck, Tony
                                   ` (2 preceding siblings ...)
  2026-01-13 19:31                 ` Nikolay Borisov
@ 2026-01-13 20:30                 ` Thomas Gleixner
  2026-01-13 20:56                 ` Borislav Petkov
  4 siblings, 0 replies; 55+ messages in thread
From: Thomas Gleixner @ 2026-01-13 20:30 UTC (permalink / raw)
  To: Luck, Tony, Borislav Petkov, Li, Rongqing
  Cc: Nikolay Borisov, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Zhuo, Qiuxu, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

On Tue, Jan 13 2026 at 18:53, Tony Luck wrote:
>> > The comment in mce_timer_fn says to adjust the polling interval, but
>> > I notice the kernel log always shows an MCE log every 5 minutes. Is this
>> > normal?
>>
>> Use git annotate to figure out which patch added this comment and in context
>> of what and that'll tell you why.
>>
>> As to the 5 minutes, look at how the check interval gets established.
>
> Once upon a time the polling interval started out at 5 minutes, but the
> interval was halved each time an error was found (so interval went
> 150s, 75s, 37s, ... down to 1s). If no error was found, then the interval
> was doubled (going back up to 300s).
>
> This is described in the comment:
>
>         /*
>          * Alert userspace if needed. If we logged an MCE, reduce the polling
>          * interval, otherwise increase the polling interval.
>          */
>
> It seems that the kernel isn't doing that today. Polling at a fixed 300 seconds
> event though errors are being found and logged.

How did we lose that?

> Interesting that the timestamps are 327.68 seconds apart, rather than
> 300 and change. So there is some strange stuff going on.

Nothing strange. That's the batching inaccuracy, aka. granularity of the
timer wheel. See the big fat comment on top of kernel/time/timer.c

So looking at that table, I'm sure you have HZ=250. But that granularity
does not explain why that interval magic is not longer working....

Thanks,

        tglx

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 19:51                             ` Nikolay Borisov
@ 2026-01-13 20:33                               ` Borislav Petkov
  0 siblings, 0 replies; 55+ messages in thread
From: Borislav Petkov @ 2026-01-13 20:33 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Luck, Tony, Li, Rongqing, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Tue, Jan 13, 2026 at 09:51:48PM +0200, Nikolay Borisov wrote:
> grepping around points to your 011d82611172 ("RAS: Add a Corrected Errors
> Collector"). Because looking at the commit log of my 6447828875b7
> ("x86/mce/inject: Remove call to mce_notify_irq()") I mention that
> mce_need_notify got introduced in your commit and it was called
> "notify_user" before that. Before your commit in mce_log there was an
> explicit set_bit(mce_need_notify) call .

It was removed there so that it doesn't issue the

	"Machine check events logged\n"

thing because that's the main point of the error collector - to collect errors
in the background and not upset users.

Also, there's an explicit set_bit(mce_need_notify) call in the early notifier.
So it'll get set a bit later, when the notifier chain goes.

But that same notifier does:

        set_bit(0, &mce_need_notify);
	mce_notify_irq();

so it'll clear that bit immediately.

Which means, the timer would probably never see the need_notify thing and thus
won't halve.

Which begs the question: is the timer even supposed to halve when you have the
error collector?

I guess so - we need it to log errors faster because the assumption is that
you'll be getting more errors so you need to read them out faster.

I need to think about it tomorrow on a clear head...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 18:53               ` Luck, Tony
                                   ` (3 preceding siblings ...)
  2026-01-13 20:30                 ` Thomas Gleixner
@ 2026-01-13 20:56                 ` Borislav Petkov
  2026-01-13 21:05                   ` Luck, Tony
  4 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-01-13 20:56 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Tue, Jan 13, 2026 at 06:53:13PM +0000, Luck, Tony wrote:
> I can reproduce here on an Icelake system. Booted with mce=no_cmci to force polling
> (and turned of BIOS firmware first mode).  Injecting an error every 30 seconds I also see
> constant 327 seconds between logs (multiple logs show up because my injection picks memory
> channel "randomly", so there can be several banks with errors when polling happens).

I'm assuming you also disabled the CEC? I.e., ras=cec_disable?

> $ dmesg | grep 'Machine Check Event:'

Did you see the "Machine check events logged\n" print from mce_notify_irq() in
dmesg too?

I'll try to repro here tomorrow.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* RE: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 20:56                 ` Borislav Petkov
@ 2026-01-13 21:05                   ` Luck, Tony
  2026-01-13 21:31                     ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Luck, Tony @ 2026-01-13 21:05 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

>> $ dmesg | grep 'Machine Check Event:'
>
> Did you see the "Machine check events logged\n" print from mce_notify_irq() in
> dmesg too?

Yes.  I used the other grep pattern to see detail of which CPU/bank logged the error.
Same pattern of timestamps shows up with this grep too.

$ dmesg | grep 'Machine check events logged'
[  662.574376] mce: [Hardware Error]: Machine check events logged
[  662.669274] mce: [Hardware Error]: Machine check events logged
[  990.224918] mce: [Hardware Error]: Machine check events logged
[  990.319788] mce: [Hardware Error]: Machine check events logged
[ 1317.875993] mce: [Hardware Error]: Machine check events logged
[ 1317.975408] mce: [Hardware Error]: Machine check events logged
[ 1645.527190] mce: [Hardware Error]: Machine check events logged
[ 1645.627058] mce: [Hardware Error]: Machine check events logged
[ 1973.178560] mce: [Hardware Error]: Machine check events logged
[ 1973.277908] mce: [Hardware Error]: Machine check events logged
[ 7870.902444] mce: [Hardware Error]: Machine check events logged
[ 8198.553364] mce: [Hardware Error]: Machine check events logged

-Tony

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 21:05                   ` Luck, Tony
@ 2026-01-13 21:31                     ` Borislav Petkov
  2026-01-13 22:41                       ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-01-13 21:31 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Tue, Jan 13, 2026 at 09:05:01PM +0000, Luck, Tony wrote:
> >> $ dmesg | grep 'Machine Check Event:'
> >
> > Did you see the "Machine check events logged\n" print from mce_notify_irq() in
> > dmesg too?
> 
> Yes.  I used the other grep pattern to see detail of which CPU/bank logged the error.
> Same pattern of timestamps shows up with this grep too.

Yah, this confirms the flow:

mce_timer_fn()-> ... -> machine_check_poll -> mce_log which will queue the
work and return.

Now, back in mce_timer_fn:

        /*
         * Alert userspace if needed. If we logged an MCE, reduce the polling
         * interval, otherwise increase the polling interval.
         */
        if (mce_notify_irq())

<--- we haven't ran the notifier chain yet so mce_need_notify is not set yet
so this won't hit and we won't halve the interval. I need to verify that
empirically.

                iv = max(iv / 2, (unsigned long) HZ/100);
        else
                iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));

And now the notifier chain runs. mce_early_notifier() sets the bit, does
mce_notify_irq(), that clears the bit and then the notifier chain a little
later (skx_edac) logs the error.

So this looks like a silly timing issue...

We could set mce_need_notify in mce_log(), zap this thing:

                if (__ratelimit(&ratelimit))
                        pr_info(HW_ERR "Machine check events logged\n");

in mce_notify_irq() or at least predicate it on the CEC being enabled and then
not call mce_notify_irq() in the notifier but leave it be called in the timer
function...

Ufff, how silly and overengineered we've made it. I need to think about
a cleaner solution tomorrow...

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 21:31                     ` Borislav Petkov
@ 2026-01-13 22:41                       ` Borislav Petkov
  2026-01-14  0:30                         ` Luck, Tony
  2026-01-14  6:17                         ` [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event Nikolay Borisov
  0 siblings, 2 replies; 55+ messages in thread
From: Borislav Petkov @ 2026-01-13 22:41 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Tue, Jan 13, 2026 at 10:31:58PM +0100, Borislav Petkov wrote:
> Ufff, how silly and overengineered we've made it. I need to think about
> a cleaner solution tomorrow...

One idea I just thought of:

those two steps:

1. setting mce_need_notify, and

2.  testing and clearing it

are basically logically a one query: do we have logged errors, or, IOW, is the
gen pool not empty?

IOW, this below.

That'll get rid of that gunk of when should we test and when set and will
JustWork(tm) for what we want it to do.

But let's see how it all really works tomorrow...

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 34440021e8cf..c378d4d450b6 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -595,7 +595,7 @@ static bool mce_notify_irq(void)
 	/* Not more than two messages every minute */
 	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
 
-	if (test_and_clear_bit(0, &mce_need_notify)) {
+	if (!mce_gen_pool_empty()) {
 		mce_work_trigger();
 
 		if (__ratelimit(&ratelimit))
@@ -618,10 +618,6 @@ static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
 	/* Emit the trace record: */
 	trace_mce_record(err);
 
-	set_bit(0, &mce_need_notify);
-
-	mce_notify_irq();
-
 	return NOTIFY_DONE;
 }
 
-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 22:41                       ` Borislav Petkov
@ 2026-01-14  0:30                         ` Luck, Tony
  2026-01-14 13:50                           ` Borislav Petkov
  2026-01-14  6:17                         ` [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event Nikolay Borisov
  1 sibling, 1 reply; 55+ messages in thread
From: Luck, Tony @ 2026-01-14  0:30 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Tue, Jan 13, 2026 at 11:41:52PM +0100, Borislav Petkov wrote:
> On Tue, Jan 13, 2026 at 10:31:58PM +0100, Borislav Petkov wrote:
> > Ufff, how silly and overengineered we've made it. I need to think about
> > a cleaner solution tomorrow...
> 
> One idea I just thought of:
> 
> those two steps:
> 
> 1. setting mce_need_notify, and
> 
> 2.  testing and clearing it
> 
> are basically logically a one query: do we have logged errors, or, IOW, is the
> gen pool not empty?
> 
> IOW, this below.
> 
> That'll get rid of that gunk of when should we test and when set and will
> JustWork(tm) for what we want it to do.
> 
> But let's see how it all really works tomorrow...
> 
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 34440021e8cf..c378d4d450b6 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -595,7 +595,7 @@ static bool mce_notify_irq(void)
>  	/* Not more than two messages every minute */
>  	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
>  
> -	if (test_and_clear_bit(0, &mce_need_notify)) {
> +	if (!mce_gen_pool_empty()) {
>  		mce_work_trigger();
>  
>  		if (__ratelimit(&ratelimit))
> @@ -618,10 +618,6 @@ static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
>  	/* Emit the trace record: */
>  	trace_mce_record(err);
>  
> -	set_bit(0, &mce_need_notify);
> -
> -	mce_notify_irq();
> -
>  	return NOTIFY_DONE;
>  }
>  

Seems to work (though you've deleted all the places where mce_need_notify
is used, so you can also delete the declaration.

@@ -90,7 +90,6 @@ struct mca_config mca_cfg __read_mostly = {
 };

 static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen);
-static unsigned long mce_need_notify;

 /*
  * MCA banks polled by the period polling timer for corrected events.


I see time delta between logs reducing while I'm injecting errors.

When I pause injection for several minutes, and then restart I see the
interval went back up again.

-Tony

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-13 22:41                       ` Borislav Petkov
  2026-01-14  0:30                         ` Luck, Tony
@ 2026-01-14  6:17                         ` Nikolay Borisov
  2026-01-14 13:52                           ` Borislav Petkov
  1 sibling, 1 reply; 55+ messages in thread
From: Nikolay Borisov @ 2026-01-14  6:17 UTC (permalink / raw)
  To: Borislav Petkov, Luck, Tony
  Cc: Li, Rongqing, Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Yazen Ghannam, Zhuo, Qiuxu,
	Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org



On 14.01.26 г. 0:41 ч., Borislav Petkov wrote:
> On Tue, Jan 13, 2026 at 10:31:58PM +0100, Borislav Petkov wrote:
>> Ufff, how silly and overengineered we've made it. I need to think about
>> a cleaner solution tomorrow...
> 
> One idea I just thought of:
> 
> those two steps:
> 
> 1. setting mce_need_notify, and
> 
> 2.  testing and clearing it
> 
> are basically logically a one query: do we have logged errors, or, IOW, is the
> gen pool not empty?
> 
> IOW, this below.
> 
> That'll get rid of that gunk of when should we test and when set and will
> JustWork(tm) for what we want it to do.
> 
> But let's see how it all really works tomorrow...
> 
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 34440021e8cf..c378d4d450b6 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -595,7 +595,7 @@ static bool mce_notify_irq(void)
>   	/* Not more than two messages every minute */
>   	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
>   
> -	if (test_and_clear_bit(0, &mce_need_notify)) {
> +	if (!mce_gen_pool_empty()) {
>   		mce_work_trigger();
>   
>   		if (__ratelimit(&ratelimit))
> @@ -618,10 +618,6 @@ static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
>   	/* Emit the trace record: */
>   	trace_mce_record(err);
>   
> -	set_bit(0, &mce_need_notify);
> -
> -	mce_notify_irq();
> -

so now mce_notify_irq() is not being called from the early notifier, i.e 
the callchain mc_poll_banks -> machine_check_poll -> mce_log no longer 
call mce_notify_irq() in the early notifier chain.

>   	return NOTIFY_DONE;
>   }
>   


^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-14  0:30                         ` Luck, Tony
@ 2026-01-14 13:50                           ` Borislav Petkov
  2026-01-14 14:48                             ` Borislav Petkov
  2026-02-07 11:51                             ` Borislav Petkov
  0 siblings, 2 replies; 55+ messages in thread
From: Borislav Petkov @ 2026-01-14 13:50 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Tue, Jan 13, 2026 at 04:30:08PM -0800, Luck, Tony wrote:
> Seems to work (though you've deleted all the places where mce_need_notify
> is used, so you can also delete the declaration.

Right.

> I see time delta between logs reducing while I'm injecting errors.
> 
> When I pause injection for several minutes, and then restart I see the
> interval went back up again.

Thanks Tony, I'll play with this too and ponder over what would be the proper
fix which to take to stable too.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-14  6:17                         ` [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event Nikolay Borisov
@ 2026-01-14 13:52                           ` Borislav Petkov
  0 siblings, 0 replies; 55+ messages in thread
From: Borislav Petkov @ 2026-01-14 13:52 UTC (permalink / raw)
  To: Nikolay Borisov
  Cc: Luck, Tony, Li, Rongqing, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Wed, Jan 14, 2026 at 08:17:50AM +0200, Nikolay Borisov wrote:
> so now mce_notify_irq() is not being called from the early notifier, i.e the
> callchain mc_poll_banks -> machine_check_poll -> mce_log no longer call
> mce_notify_irq() in the early notifier chain.

... which was the result of us moving this back'n'forth and refactoring it as
part of normal development. The thing is, that notification mechanism is
perhaps not needed there anymore, in favor of simply querying the gen pool.

But we'll see - needs more staring and injection.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-14 13:50                           ` Borislav Petkov
@ 2026-01-14 14:48                             ` Borislav Petkov
  2026-02-02 15:18                               ` Borislav Petkov
  2026-02-07 11:51                             ` Borislav Petkov
  1 sibling, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-01-14 14:48 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

Ok,

I can repro in a guest here too:

[  316.791005] mce: [Hardware Error]: Machine check events logged
[  316.793836] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[  316.797398] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[  316.800013] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1768404431 SOCKET 0 APIC 0 microcode 800820d
[  628.086857] mce: [Hardware Error]: Machine check events logged
[  628.089625] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[  628.093156] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[  628.095739] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1768404743 SOCKET 0 APIC 0 microcode 800820d
[  939.382727] mce: [Hardware Error]: Machine check events logged
[  939.385593] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[  939.389194] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[  939.391690] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1768405054 SOCKET 0 APIC 0 microcode 800820d
[ 1250.678672] mce: [Hardware Error]: Machine check events logged
[ 1250.681584] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[ 1250.685221] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[ 1250.687786] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1768405365 SOCKET 0 APIC 0 microcode 800820d

Timestamps are consistent with the observation.

Now on to find what causes this. Even if we can't find the proper commit,
I guess testing 6.18 and 6.12 - the LTS kernels - should be good enough as to
backport a fix there.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-14 14:48                             ` Borislav Petkov
@ 2026-02-02 15:18                               ` Borislav Petkov
  2026-02-02 23:49                                 ` 答复: [外部邮件] " Li,Rongqing
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-02-02 15:18 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Wed, Jan 14, 2026 at 03:48:13PM +0100, Borislav Petkov wrote:
> Now on to find what causes this. Even if we can't find the proper commit,
> I guess testing 6.18 and 6.12 - the LTS kernels - should be good enough as to
> backport a fix there.

Ok, finally back to staring at this.

Looks like adding this:

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 34440021e8cf..b94efe5950c4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -154,6 +154,8 @@ void mce_log(struct mce_hw_err *err)
 {
        if (mce_gen_pool_add(err))
                irq_work_queue(&mce_irq_work);
+
+       set_bit(0, &mce_need_notify);
 }
 EXPORT_SYMBOL_GPL(mce_log);
 
makes the interval halve again, see below for the timestamps.

I guess I'll do a proper patch from the hunk here:

https://lore.kernel.org/r/20260113224152.GVaWbKMMzManQ5WwlT@fat_crate.local

along with 6.12 and 6.18 backports and see whether that's a good enough as
a stable fix too.

Thx.

[  316.795248] mce: [Hardware Error]: Machine check events logged
[  316.795262] mce: [Hardware Error]: Machine check events logged
[  316.798331] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[  316.800104] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[  316.801442] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770040950 SOCKET 0 APIC 0 microcode 800820d
[  628.091492] mce: [Hardware Error]: Machine check events logged
[  628.091515] mce: [Hardware Error]: Machine check events logged
[  628.097216] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[  628.101393] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[  628.103992] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770041262 SOCKET 0 APIC 0 microcode 800820d

<--- it starts decreasing the interval here.

[  825.581354] hrtimer: interrupt took 18820 ns
[  939.387367] mce: [Hardware Error]: Machine check events logged
[  939.390185] mce: [Hardware Error]: Machine check events logged
[  939.392936] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[  939.396465] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[  939.399042] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770041573 SOCKET 0 APIC 0 microcode 800820d
[ 1103.227402] mce: [Hardware Error]: Machine check events logged
[ 1103.230267] mce: [Hardware Error]: Machine check events logged
[ 1103.233018] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[ 1103.236565] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[ 1103.239146] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770041737 SOCKET 0 APIC 0 microcode 800820d
[ 1179.003479] mce: [Hardware Error]: Machine check events logged
[ 1179.006452] mce: [Hardware Error]: Machine check events logged
[ 1179.009144] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[ 1179.012757] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[ 1179.015338] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770041813 SOCKET 0 APIC 0 microcode 800820d
[ 1217.915386] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[ 1217.919088] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[ 1217.921662] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770041852 SOCKET 0 APIC 0 microcode 800820d
[ 1238.395440] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[ 1238.399041] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[ 1238.401619] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770041872 SOCKET 0 APIC 0 microcode 800820d
[ 1269.115368] mce_notify_irq: 4 callbacks suppressed
[ 1269.117829] mce: [Hardware Error]: Machine check events logged
[ 1269.120586] mce: [Hardware Error]: Machine check events logged
[ 1269.123412] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[ 1269.126950] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[ 1269.129511] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770041903 SOCKET 0 APIC 0 microcode 800820d

and then it started enlarging it again when I changed the injection interval
to 300s.

[ 1578.363408] mce: [Hardware Error]: Machine check events logged
[ 1578.366346] mce: [Hardware Error]: Machine check events logged
[ 1578.369174] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[ 1578.372742] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[ 1578.375226] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770042212 SOCKET 0 APIC 0 microcode 800820d
[ 2119.035460] mce: [Hardware Error]: Machine check events logged
[ 2119.038432] mce: [Hardware Error]: Machine check events logged
[ 2119.041236] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[ 2119.044846] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[ 2119.047340] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770042753 SOCKET 0 APIC 0 microcode 800820d
[ 2282.875491] mce: [Hardware Error]: Machine check events logged
[ 2282.878409] mce: [Hardware Error]: Machine check events logged
[ 2282.881277] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[ 2282.884978] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[ 2282.887482] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770042917 SOCKET 0 APIC 0 microcode 800820d
[ 2512.251516] mce: [Hardware Error]: Machine check events logged
[ 2512.254371] mce: [Hardware Error]: Machine check events logged
[ 2512.257261] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
[ 2512.260841] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b 
[ 2512.263406] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770043146 SOCKET 0 APIC 0 microcode 800820d

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-02-02 15:18                               ` Borislav Petkov
@ 2026-02-02 23:49                                 ` Li,Rongqing
  2026-02-06 22:03                                   ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Li,Rongqing @ 2026-02-02 23:49 UTC (permalink / raw)
  To: Borislav Petkov, Luck, Tony
  Cc: Nikolay Borisov, Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Yazen Ghannam, Zhuo, Qiuxu,
	Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

> On Wed, Jan 14, 2026 at 03:48:13PM +0100, Borislav Petkov wrote:
> > Now on to find what causes this. Even if we can't find the proper
> > commit, I guess testing 6.18 and 6.12 - the LTS kernels - should be
> > good enough as to backport a fix there.
> 
> Ok, finally back to staring at this.
> 
> Looks like adding this:
> 
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 34440021e8cf..b94efe5950c4 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -154,6 +154,8 @@ void mce_log(struct mce_hw_err *err)  {
>         if (mce_gen_pool_add(err))
>                 irq_work_queue(&mce_irq_work);
> +
> +       set_bit(0, &mce_need_notify);
>  }
>  EXPORT_SYMBOL_GPL(mce_log);
> 
> makes the interval halve again, see below for the timestamps.
> 
> I guess I'll do a proper patch from the hunk here:
> 
> https://lore.kernel.org/r/20260113224152.GVaWbKMMzManQ5WwlT@fat_cr
> ate.local
> 

Is it possible where CPU0 sets mce_need_notify, but CPU1 concurrently calls mce_notify_irq in mce_timer_fn, and then CPU1 sets its own timer to 1/2 instead of CPU0's

[Li,Rongqing] 



> along with 6.12 and 6.18 backports and see whether that's a good enough as a
> stable fix too.
> 
> Thx.
> 
> [  316.795248] mce: [Hardware Error]: Machine check events logged
> [  316.795262] mce: [Hardware Error]: Machine check events logged
> [  316.798331] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4:
> 9c2041000000011b [  316.800104] mce: [Hardware Error]: TSC 0 ADDR
> 6d3d483b [  316.801442] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME
> 1770040950 SOCKET 0 APIC 0 microcode 800820d [  628.091492] mce:
> [Hardware Error]: Machine check events logged [  628.091515] mce:
> [Hardware Error]: Machine check events logged [  628.097216] mce:
> [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
> [  628.101393] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b [  628.103992]
> mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770041262 SOCKET 0
> APIC 0 microcode 800820d
> 
> <--- it starts decreasing the interval here.
> 
> [  825.581354] hrtimer: interrupt took 18820 ns [  939.387367] mce:
> [Hardware Error]: Machine check events logged [  939.390185] mce:
> [Hardware Error]: Machine check events logged [  939.392936] mce:
> [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
> [  939.396465] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b [  939.399042]
> mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770041573 SOCKET 0
> APIC 0 microcode 800820d [ 1103.227402] mce: [Hardware Error]: Machine
> check events logged [ 1103.230267] mce: [Hardware Error]: Machine check
> events logged [ 1103.233018] mce: [Hardware Error]: CPU 0: Machine Check: 0
> Bank 4: 9c2041000000011b [ 1103.236565] mce: [Hardware Error]: TSC 0
> ADDR 6d3d483b [ 1103.239146] mce: [Hardware Error]: PROCESSOR 2:800f82
> TIME 1770041737 SOCKET 0 APIC 0 microcode 800820d [ 1179.003479] mce:
> [Hardware Error]: Machine check events logged [ 1179.006452] mce:
> [Hardware Error]: Machine check events logged [ 1179.009144] mce:
> [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
> [ 1179.012757] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b [ 1179.015338]
> mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770041813 SOCKET 0
> APIC 0 microcode 800820d [ 1217.915386] mce: [Hardware Error]: CPU 0:
> Machine Check: 0 Bank 4: 9c2041000000011b [ 1217.919088] mce:
> [Hardware Error]: TSC 0 ADDR 6d3d483b [ 1217.921662] mce: [Hardware
> Error]: PROCESSOR 2:800f82 TIME 1770041852 SOCKET 0 APIC 0 microcode
> 800820d [ 1238.395440] mce: [Hardware Error]: CPU 0: Machine Check: 0
> Bank 4: 9c2041000000011b [ 1238.399041] mce: [Hardware Error]: TSC 0
> ADDR 6d3d483b [ 1238.401619] mce: [Hardware Error]: PROCESSOR 2:800f82
> TIME 1770041872 SOCKET 0 APIC 0 microcode 800820d [ 1269.115368]
> mce_notify_irq: 4 callbacks suppressed [ 1269.117829] mce: [Hardware Error]:
> Machine check events logged [ 1269.120586] mce: [Hardware Error]: Machine
> check events logged [ 1269.123412] mce: [Hardware Error]: CPU 0: Machine
> Check: 0 Bank 4: 9c2041000000011b [ 1269.126950] mce: [Hardware Error]:
> TSC 0 ADDR 6d3d483b [ 1269.129511] mce: [Hardware Error]: PROCESSOR
> 2:800f82 TIME 1770041903 SOCKET 0 APIC 0 microcode 800820d
> 
> and then it started enlarging it again when I changed the injection interval to
> 300s.
> 
> [ 1578.363408] mce: [Hardware Error]: Machine check events logged
> [ 1578.366346] mce: [Hardware Error]: Machine check events logged
> [ 1578.369174] mce: [Hardware Error]: CPU 0: Machine Check: 0 Bank 4:
> 9c2041000000011b [ 1578.372742] mce: [Hardware Error]: TSC 0 ADDR
> 6d3d483b [ 1578.375226] mce: [Hardware Error]: PROCESSOR 2:800f82 TIME
> 1770042212 SOCKET 0 APIC 0 microcode 800820d [ 2119.035460] mce:
> [Hardware Error]: Machine check events logged [ 2119.038432] mce:
> [Hardware Error]: Machine check events logged [ 2119.041236] mce:
> [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
> [ 2119.044846] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b [ 2119.047340]
> mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770042753 SOCKET 0
> APIC 0 microcode 800820d [ 2282.875491] mce: [Hardware Error]: Machine
> check events logged [ 2282.878409] mce: [Hardware Error]: Machine check
> events logged [ 2282.881277] mce: [Hardware Error]: CPU 0: Machine Check: 0
> Bank 4: 9c2041000000011b [ 2282.884978] mce: [Hardware Error]: TSC 0
> ADDR 6d3d483b [ 2282.887482] mce: [Hardware Error]: PROCESSOR 2:800f82
> TIME 1770042917 SOCKET 0 APIC 0 microcode 800820d [ 2512.251516] mce:
> [Hardware Error]: Machine check events logged [ 2512.254371] mce:
> [Hardware Error]: Machine check events logged [ 2512.257261] mce:
> [Hardware Error]: CPU 0: Machine Check: 0 Bank 4: 9c2041000000011b
> [ 2512.260841] mce: [Hardware Error]: TSC 0 ADDR 6d3d483b [ 2512.263406]
> mce: [Hardware Error]: PROCESSOR 2:800f82 TIME 1770043146 SOCKET 0
> APIC 0 microcode 800820d
> 
> --
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-02-02 23:49                                 ` 答复: [外部邮件] " Li,Rongqing
@ 2026-02-06 22:03                                   ` Borislav Petkov
  0 siblings, 0 replies; 55+ messages in thread
From: Borislav Petkov @ 2026-02-06 22:03 UTC (permalink / raw)
  To: Li,Rongqing
  Cc: Luck, Tony, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Mon, Feb 02, 2026 at 11:49:40PM +0000, Li,Rongqing wrote:
> Is it possible where CPU0 sets mce_need_notify, but CPU1 concurrently calls
> mce_notify_irq in mce_timer_fn, and then CPU1 sets its own timer to 1/2
> instead of CPU0's

I don't see why not.

But this is no different than what it was before AFAICT.

And if the timer expiry interval on the wrong CPU gets halved, then it'll see
faster that there's no error for it to log and it'll enlarge the interval
again.

So I don't see a problem with that particular scenario.

Or?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-01-14 13:50                           ` Borislav Petkov
  2026-01-14 14:48                             ` Borislav Petkov
@ 2026-02-07 11:51                             ` Borislav Petkov
  2026-02-09 17:37                               ` Luck, Tony
  1 sibling, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-02-07 11:51 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Wed, Jan 14, 2026 at 02:50:34PM +0100, Borislav Petkov wrote:
> On Tue, Jan 13, 2026 at 04:30:08PM -0800, Luck, Tony wrote:
> > Seems to work (though you've deleted all the places where mce_need_notify
> > is used, so you can also delete the declaration.
> 
> Right.
> 
> > I see time delta between logs reducing while I'm injecting errors.
> > 
> > When I pause injection for several minutes, and then restart I see the
> > interval went back up again.
> 
> Thanks Tony, I'll play with this too and ponder over what would be the proper
> fix which to take to stable too.

Hmm, so looking at this more while it is all peaceful and I can actually hear
the thoughts in my head... :-)

The whole dance here on the MCE logging path:

mce_log -> ... mce_irq_work -> ... mce_work -> mce_gen_pool_process

can happen in between two mce_timer_fn() function firings - just think of
the default timer running once every 5 mins.

So in-between those runs with 5 min timeout, errors can get logged and when
mce_notify_irq() runs, it won't see either that the genpool is not empty
- it will be empty - and mce_need_notify will be 0 too because we would've
set and cleared it. 

So basically, the timer fires, we log errors without it noticing anything, and
it won't halve.

The only way it would halve is if it manages to notice an error being
in-flight to being logged and it fires right then and there. Then its interval
would get halved.

And this sounds kinda weird and not what we want perhaps.

So fixing that would mean, we'd have to write down the fact that in-between
two timer invocations, we have logged errors. Maybe a per-CPU counter
somewhere which says "this CPU logged so many errors after the timer ran the
last time".

The timer would fire, check that counter for != 0, and if so, decrease
interval and clear it.

And it doesn't even have to be a counter - it suffices to be a single bit
which gets set.

A scheme like that would solve this accurately I'd say.

But the real question actually is, do we really care?

I mean, this thing went unnoticed for so long and frankly, people should run
the CEC anyway which has a better MCE-has-been-logged stifling capability so
that I wanna say, let's do the simplest thing and be done with it.

Or?

Do we care about some real use case here...?

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-02-07 11:51                             ` Borislav Petkov
@ 2026-02-09 17:37                               ` Luck, Tony
  2026-02-10 15:01                                 ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Luck, Tony @ 2026-02-09 17:37 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Sat, Feb 07, 2026 at 12:51:42PM +0100, Borislav Petkov wrote:
> On Wed, Jan 14, 2026 at 02:50:34PM +0100, Borislav Petkov wrote:
> > On Tue, Jan 13, 2026 at 04:30:08PM -0800, Luck, Tony wrote:
> > > Seems to work (though you've deleted all the places where mce_need_notify
> > > is used, so you can also delete the declaration.
> > 
> > Right.
> > 
> > > I see time delta between logs reducing while I'm injecting errors.
> > > 
> > > When I pause injection for several minutes, and then restart I see the
> > > interval went back up again.
> > 
> > Thanks Tony, I'll play with this too and ponder over what would be the proper
> > fix which to take to stable too.
> 
> Hmm, so looking at this more while it is all peaceful and I can actually hear
> the thoughts in my head... :-)
> 
> The whole dance here on the MCE logging path:
> 
> mce_log -> ... mce_irq_work -> ... mce_work -> mce_gen_pool_process
> 
> can happen in between two mce_timer_fn() function firings - just think of
> the default timer running once every 5 mins.
> 
> So in-between those runs with 5 min timeout, errors can get logged and when
> mce_notify_irq() runs, it won't see either that the genpool is not empty
> - it will be empty - and mce_need_notify will be 0 too because we would've
> set and cleared it. 

The algorithm to halve the interval when errors are found, and double it
when they are not found was orginally for a "poll-only" configuration.
So there wasn't an option for an error to be logged between timer
invocations. This all dates back to before #MC was recoverable.

If the system is now running in some mixed mode of polling and
interrupts, then it is unclear what should be done in various
new cases.

> 
> So basically, the timer fires, we log errors without it noticing anything, and
> it won't halve.
> 
> The only way it would halve is if it manages to notice an error being
> in-flight to being logged and it fires right then and there. Then its interval
> would get halved.
> 
> And this sounds kinda weird and not what we want perhaps.
> 
> So fixing that would mean, we'd have to write down the fact that in-between
> two timer invocations, we have logged errors. Maybe a per-CPU counter
> somewhere which says "this CPU logged so many errors after the timer ran the
> last time".
> 
> The timer would fire, check that counter for != 0, and if so, decrease
> interval and clear it.
> 
> And it doesn't even have to be a counter - it suffices to be a single bit
> which gets set.
> 
> A scheme like that would solve this accurately I'd say.
> 
> But the real question actually is, do we really care?

I don't think we care. If we miss out halving the interval becuause an
error was logged between timer based polling, nothing really bad will
happen. The interval might get sorted out on the next interval.

> I mean, this thing went unnoticed for so long and frankly, people should run
> the CEC anyway which has a better MCE-has-been-logged stifling capability so
> that I wanna say, let's do the simplest thing and be done with it.
> 
> Or?
> 
> Do we care about some real use case here...?
> 

Unless someone has a real world case where something is going badly
wrong, then I don't think any changes are needed to cover this race.

-Tony

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-02-09 17:37                               ` Luck, Tony
@ 2026-02-10 15:01                                 ` Borislav Petkov
  2026-03-06  7:37                                   ` 答复: [外部邮件] " Li,Rongqing(ACG CCN)
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-02-10 15:01 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Li, Rongqing, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Mon, Feb 09, 2026 at 09:37:35AM -0800, Luck, Tony wrote:
> If the system is now running in some mixed mode of polling and
> interrupts, then it is unclear what should be done in various
> new cases.

Well, what about CMCI? AMD has similar interrupt types. Those handlers end up
in the same path of machine_check_poll() and the above scenario can happen,
AFAICT.

> I don't think we care. If we miss out halving the interval becuause an
> error was logged between timer based polling, nothing really bad will
> happen. The interval might get sorted out on the next interval.

Right, that's what I'm thinking too.

> Unless someone has a real world case where something is going badly
> wrong, then I don't think any changes are needed to cover this race.

Ok, let's try the simple thing first.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-02-10 15:01                                 ` Borislav Petkov
@ 2026-03-06  7:37                                   ` Li,Rongqing(ACG CCN)
  2026-03-06 14:00                                     ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Li,Rongqing(ACG CCN) @ 2026-03-06  7:37 UTC (permalink / raw)
  To: Borislav Petkov, Luck, Tony
  Cc: Nikolay Borisov, Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Yazen Ghannam, Zhuo, Qiuxu,
	Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

> Well, what about CMCI? AMD has similar interrupt types. Those handlers end up
> in the same path of machine_check_poll() and the above scenario can happen,
> AFAICT.
> 
> > I don't think we care. If we miss out halving the interval becuause an
> > error was logged between timer based polling, nothing really bad will
> > happen. The interval might get sorted out on the next interval.
> 
> Right, that's what I'm thinking too.
> 
> > Unless someone has a real world case where something is going badly
> > wrong, then I don't think any changes are needed to cover this race.
> 
> Ok, let's try the simple thing first.
> 
> Thx.

Is there any progress on this?

Thx

[Li,Rongqing] 


> 
> --
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-03-06  7:37                                   ` 答复: [外部邮件] " Li,Rongqing(ACG CCN)
@ 2026-03-06 14:00                                     ` Borislav Petkov
  2026-03-06 14:38                                       ` 答复: " Li,Rongqing(ACG CCN)
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-03-06 14:00 UTC (permalink / raw)
  To: Li,Rongqing(ACG CCN), Luck, Tony
  Cc: Nikolay Borisov, Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Yazen Ghannam, Zhuo, Qiuxu,
	Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On March 6, 2026 7:37:01 AM UTC, "Li,Rongqing(ACG CCN)" <lirongqing@baidu.com> wrote:
>Is there any progress on this?

Not yet.

May I ask what is your usecase for this? 

-- 
Small device. Typos and formatting crap

^ permalink raw reply	[flat|nested] 55+ messages in thread

* 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-03-06 14:00                                     ` Borislav Petkov
@ 2026-03-06 14:38                                       ` Li,Rongqing(ACG CCN)
  2026-03-06 15:29                                         ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Li,Rongqing(ACG CCN) @ 2026-03-06 14:38 UTC (permalink / raw)
  To: Borislav Petkov, Luck, Tony
  Cc: Nikolay Borisov, Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Yazen Ghannam, Zhuo, Qiuxu,
	Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org



> -----邮件原件-----
> 发件人: Borislav Petkov <bp@alien8.de>
> 发送时间: 2026年3月6日 22:00
> 收件人: Li,Rongqing(ACG CCN) <lirongqing@baidu.com>; Luck, Tony
> <tony.luck@intel.com>
> 抄送: Nikolay Borisov <nik.borisov@suse.com>; Thomas Gleixner
> <tglx@kernel.org>; Ingo Molnar <mingo@redhat.com>; Dave Hansen
> <dave.hansen@linux.intel.com>; x86@kernel.org; H . Peter Anvin
> <hpa@zytor.com>; Yazen Ghannam <yazen.ghannam@amd.com>; Zhuo, Qiuxu
> <qiuxu.zhuo@intel.com>; Avadhut Naik <avadhut.naik@amd.com>;
> linux-kernel@vger.kernel.org; linux-edac@vger.kernel.org
> 主题: Re: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval
> adjustment after logging a MCE event
> 
> On March 6, 2026 7:37:01 AM UTC, "Li,Rongqing(ACG CCN)"
> <lirongqing@baidu.com> wrote:
> >Is there any progress on this?
> 
> Not yet.
> 
> May I ask what is your usecase for this? 


We anticipate potential UE issues by analyzing the volume and frequency of collected CE reports, enabling us to perform proactive task offloading and machine maintenance. However, inaccuracies in the collected data are currently undermining this approach, making it difficult to reliably predict UE incidents.

[Li,Rongqing] 
> 
> --
> Small device. Typos and formatting crap

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-03-06 14:38                                       ` 答复: " Li,Rongqing(ACG CCN)
@ 2026-03-06 15:29                                         ` Borislav Petkov
  2026-03-07  1:18                                           ` 答复: " Li,Rongqing(ACG CCN)
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-03-06 15:29 UTC (permalink / raw)
  To: Li,Rongqing(ACG CCN)
  Cc: Luck, Tony, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Fri, Mar 06, 2026 at 02:38:29PM +0000, Li,Rongqing(ACG CCN) wrote:
> We anticipate potential UE issues by analyzing the volume and frequency of
> collected CE reports, enabling us to perform proactive task offloading and
> machine maintenance. However, inaccuracies in the collected data are
> currently undermining this approach, making it difficult to reliably predict
> UE incidents.

This looks like a canned AI reply to me.

I think you wanna say, you want to get *every* single error logged. Yes?

So you want to be able to decrease the polling interval if necessary?

Do you also disable the RAS CEC?

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-03-06 15:29                                         ` Borislav Petkov
@ 2026-03-07  1:18                                           ` Li,Rongqing(ACG CCN)
  2026-03-16 13:44                                             ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Li,Rongqing(ACG CCN) @ 2026-03-07  1:18 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Luck, Tony, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org



> -----邮件原件-----
> 发件人: Borislav Petkov <bp@alien8.de>
> 发送时间: 2026年3月6日 23:29
> 收件人: Li,Rongqing(ACG CCN) <lirongqing@baidu.com>
> 抄送: Luck, Tony <tony.luck@intel.com>; Nikolay Borisov
> <nik.borisov@suse.com>; Thomas Gleixner <tglx@kernel.org>; Ingo Molnar
> <mingo@redhat.com>; Dave Hansen <dave.hansen@linux.intel.com>;
> x86@kernel.org; H . Peter Anvin <hpa@zytor.com>; Yazen Ghannam
> <yazen.ghannam@amd.com>; Zhuo, Qiuxu <qiuxu.zhuo@intel.com>; Avadhut
> Naik <avadhut.naik@amd.com>; linux-kernel@vger.kernel.org;
> linux-edac@vger.kernel.org
> 主题: Re: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval
> adjustment after logging a MCE event
> 
> On Fri, Mar 06, 2026 at 02:38:29PM +0000, Li,Rongqing(ACG CCN) wrote:
> > We anticipate potential UE issues by analyzing the volume and
> > frequency of collected CE reports, enabling us to perform proactive
> > task offloading and machine maintenance. However, inaccuracies in the
> > collected data are currently undermining this approach, making it
> > difficult to reliably predict UE incidents.
> 
> This looks like a canned AI reply to me.
> 
> I think you wanna say, you want to get *every* single error logged. Yes?
> 
> So you want to be able to decrease the polling interval if necessary?
> 
> Do you also disable the RAS CEC?
> 

CEC may not work in some cases. For example, when QEMU uses vDPA devices, all of QEMU's memory is pinned and cannot be offlined. hugetlbfs has only recently gained support for offline operations; offlining hugetlbfs can cause issues. The kernel provides an interface to disable offline, as referenced in the patch "mm/memory-failure: userspace controls soft-offlining pages." 
And evaluating Correctable Errors (CE) in userspace, more parameters can be considered, such as memory manufacturer, batch, and frequency, to improve prediction accuracy 


[Li,Rongqing] 


> Thx.
> 
> --
> Regards/Gruss,
>     Boris.
> 
> https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: 答复: 答复: 答复: [外部邮件] Re: [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event
  2026-03-07  1:18                                           ` 答复: " Li,Rongqing(ACG CCN)
@ 2026-03-16 13:44                                             ` Borislav Petkov
  2026-04-06 22:49                                               ` [PATCH] x86/mce: Restore MCA polling interval halving Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-03-16 13:44 UTC (permalink / raw)
  To: Li,Rongqing(ACG CCN)
  Cc: Luck, Tony, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

On Sat, Mar 07, 2026 at 01:18:50AM +0000, Li,Rongqing(ACG CCN) wrote:
> CEC may not work in some cases.

I actually am saying that if you wanna look at every CE, you *want* to disable
the CEC because it consumes them. Look at

drivers/ras/cec.c

for more info.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* [PATCH] x86/mce: Restore MCA polling interval halving
  2026-03-16 13:44                                             ` Borislav Petkov
@ 2026-04-06 22:49                                               ` Borislav Petkov
  2026-04-07 12:51                                                 ` Nikolay Borisov
  2026-04-07 15:04                                                 ` Zhuo, Qiuxu
  0 siblings, 2 replies; 55+ messages in thread
From: Borislav Petkov @ 2026-04-06 22:49 UTC (permalink / raw)
  To: Li,Rongqing(ACG CCN)
  Cc: Luck, Tony, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Zhuo, Qiuxu, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

Ok,

finally. :-\

Pls run it to make sure it DTRT for you too.

Thx.

---
From: "Borislav Petkov (AMD)" <bp@alien8.de>
Date: Mon, 16 Mar 2026 16:12:00 +0100
Subject: [PATCH] x86/mce: Restore MCA polling interval halving

RongQing reported that the MCA polling interval doesn't halve when an
error gets logged. It was traced down to the commit in Fixes: because:

mce_timer_fn()
|-> mce_poll_banks()
|-> machine_check_poll()
|-> mce_log()

which will queue the work and return.

Now, back in mce_timer_fn():

        /*
         * Alert userspace if needed. If we logged an MCE, reduce the polling
         * interval, otherwise increase the polling interval.
         */
        if (mce_notify_irq())

<--- here we haven't ran the notifier chain yet so mce_need_notify is
not set yet so this won't hit and we won't halve the interval iv.

Now the notifier chain runs. mce_early_notifier() sets the bit, does
mce_notify_irq(), that clears the bit and then the notifier chain
a little later logs the error.

So this is a silly timing issue.

But, that's all unnecessary.

All it needs to happen here is, the "should we notify of a logged MCE"
mce_notify_irq() asks, should be simply a question to the mce gen pool:
"Are you empty?"

And that then turns into a simple yes or no answer and it all
JustWorks(tm).

So do that.

Fixes: 011d82611172 ("RAS: Add a Corrected Errors Collector")
Reported-by: Li RongQing <lirongqing@baidu.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20260112082747.2842-1-lirongqing@baidu.com
---
 arch/x86/kernel/cpu/mce/core.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 8dd424ac5de8..d18db7d8d237 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -90,7 +90,6 @@ struct mca_config mca_cfg __read_mostly = {
 };
 
 static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen);
-static unsigned long mce_need_notify;
 
 /*
  * MCA banks polled by the period polling timer for corrected events.
@@ -595,7 +594,7 @@ static bool mce_notify_irq(void)
 	/* Not more than two messages every minute */
 	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
 
-	if (test_and_clear_bit(0, &mce_need_notify)) {
+	if (!mce_gen_pool_empty()) {
 		mce_work_trigger();
 
 		if (__ratelimit(&ratelimit))
@@ -618,10 +617,6 @@ static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
 	/* Emit the trace record: */
 	trace_mce_record(err);
 
-	set_bit(0, &mce_need_notify);
-
-	mce_notify_irq();
-
 	return NOTIFY_DONE;
 }
 
-- 
2.51.0


-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-06 22:49                                               ` [PATCH] x86/mce: Restore MCA polling interval halving Borislav Petkov
@ 2026-04-07 12:51                                                 ` Nikolay Borisov
  2026-04-07 15:04                                                 ` Zhuo, Qiuxu
  1 sibling, 0 replies; 55+ messages in thread
From: Nikolay Borisov @ 2026-04-07 12:51 UTC (permalink / raw)
  To: Borislav Petkov, Li,Rongqing(ACG CCN)
  Cc: Luck, Tony, Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Yazen Ghannam, Zhuo, Qiuxu,
	Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org



On 7.04.26 г. 1:49 ч., Borislav Petkov wrote:
> Ok,
> 
> finally. :-\
> 
> Pls run it to make sure it DTRT for you too.
> 
> Thx.
> 
> ---
> From: "Borislav Petkov (AMD)" <bp@alien8.de>
> Date: Mon, 16 Mar 2026 16:12:00 +0100
> Subject: [PATCH] x86/mce: Restore MCA polling interval halving
> 
> RongQing reported that the MCA polling interval doesn't halve when an
> error gets logged. It was traced down to the commit in Fixes: because:
> 
> mce_timer_fn()
> |-> mce_poll_banks()
> |-> machine_check_poll()
> |-> mce_log()
> 
> which will queue the work and return.
> 
> Now, back in mce_timer_fn():
> 
>          /*
>           * Alert userspace if needed. If we logged an MCE, reduce the polling
>           * interval, otherwise increase the polling interval.
>           */
>          if (mce_notify_irq())
> 
> <--- here we haven't ran the notifier chain yet so mce_need_notify is
> not set yet so this won't hit and we won't halve the interval iv.
> 
> Now the notifier chain runs. mce_early_notifier() sets the bit, does
> mce_notify_irq(), that clears the bit and then the notifier chain
> a little later logs the error.
> 
> So this is a silly timing issue.
> 
> But, that's all unnecessary.
> 
> All it needs to happen here is, the "should we notify of a logged MCE"
> mce_notify_irq() asks, should be simply a question to the mce gen pool:
> "Are you empty?"
> 
> And that then turns into a simple yes or no answer and it all
> JustWorks(tm).
> 
> So do that.
> 
> Fixes: 011d82611172 ("RAS: Add a Corrected Errors Collector")
> Reported-by: Li RongQing <lirongqing@baidu.com>
> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
> Link: https://lore.kernel.org/r/20260112082747.2842-1-lirongqing@baidu.com


Much cleaner and simpler,

Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>

^ permalink raw reply	[flat|nested] 55+ messages in thread

* RE: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-06 22:49                                               ` [PATCH] x86/mce: Restore MCA polling interval halving Borislav Petkov
  2026-04-07 12:51                                                 ` Nikolay Borisov
@ 2026-04-07 15:04                                                 ` Zhuo, Qiuxu
  2026-04-14 21:18                                                   ` Borislav Petkov
  1 sibling, 1 reply; 55+ messages in thread
From: Zhuo, Qiuxu @ 2026-04-07 15:04 UTC (permalink / raw)
  To: Borislav Petkov, Li,Rongqing(ACG CCN)
  Cc: Luck, Tony, Nikolay Borisov, Thomas Gleixner, Ingo Molnar,
	Dave Hansen, x86@kernel.org, H . Peter Anvin, Yazen Ghannam,
	Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

Hi Boris,

> From: Borislav Petkov <bp@alien8.de>
> Sent: Tuesday, April 7, 2026 6:49 AM
> To: Li,Rongqing(ACG CCN) <lirongqing@baidu.com>
> Cc: Luck, Tony <tony.luck@intel.com>; Nikolay Borisov
> <nik.borisov@suse.com>; Thomas Gleixner <tglx@kernel.org>; Ingo Molnar
> <mingo@redhat.com>; Dave Hansen <dave.hansen@linux.intel.com>;
> x86@kernel.org; H . Peter Anvin <hpa@zytor.com>; Yazen Ghannam
> <yazen.ghannam@amd.com>; Zhuo, Qiuxu <qiuxu.zhuo@intel.com>;
> Avadhut Naik <avadhut.naik@amd.com>; linux-kernel@vger.kernel.org; linux-
> edac@vger.kernel.org
> Subject: [PATCH] x86/mce: Restore MCA polling interval halving
> 
> Ok,
> 
> finally. :-\
> 
> Pls run it to make sure it DTRT for you too.
> 
> Thx.
> 
> ---
> From: "Borislav Petkov (AMD)" <bp@alien8.de>
> Date: Mon, 16 Mar 2026 16:12:00 +0100
> Subject: [PATCH] x86/mce: Restore MCA polling interval halving
> 
> RongQing reported that the MCA polling interval doesn't halve when an error
> gets logged. It was traced down to the commit in Fixes: because:
> 
> mce_timer_fn()
> |-> mce_poll_banks()
> |-> machine_check_poll()
> |-> mce_log()
> 
> which will queue the work and return.
> 
> Now, back in mce_timer_fn():
> 
>         /*
>          * Alert userspace if needed. If we logged an MCE, reduce the polling
>          * interval, otherwise increase the polling interval.
>          */
>         if (mce_notify_irq())
> 
> <--- here we haven't ran the notifier chain yet so mce_need_notify is not set
> yet so this won't hit and we won't halve the interval iv.
> 
> Now the notifier chain runs. mce_early_notifier() sets the bit, does
> mce_notify_irq(), that clears the bit and then the notifier chain a little later
> logs the error.
> 
> So this is a silly timing issue.
> 
> But, that's all unnecessary.
> 
> All it needs to happen here is, the "should we notify of a logged MCE"
> mce_notify_irq() asks, should be simply a question to the mce gen pool:
> "Are you empty?"
> 
> And that then turns into a simple yes or no answer and it all JustWorks(tm).
> 
> So do that.
> 
> Fixes: 011d82611172 ("RAS: Add a Corrected Errors Collector")
> Reported-by: Li RongQing <lirongqing@baidu.com>
> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
> Link: https://lore.kernel.org/r/20260112082747.2842-1-
> lirongqing@baidu.com
> ---
>  arch/x86/kernel/cpu/mce/core.c | 7 +------
>  1 file changed, 1 insertion(+), 6 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 8dd424ac5de8..d18db7d8d237 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -90,7 +90,6 @@ struct mca_config mca_cfg __read_mostly = {  };
> 
>  static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen); -static unsigned
> long mce_need_notify;
> 
>  /*
>   * MCA banks polled by the period polling timer for corrected events.
> @@ -595,7 +594,7 @@ static bool mce_notify_irq(void)
>  	/* Not more than two messages every minute */
>  	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
> 
> -	if (test_and_clear_bit(0, &mce_need_notify)) {
> +	if (!mce_gen_pool_empty()) {
>  		mce_work_trigger();
> 
>  		if (__ratelimit(&ratelimit))
> @@ -618,10 +617,6 @@ static int mce_early_notifier(struct notifier_block
> *nb, unsigned long val,
>  	/* Emit the trace record: */
>  	trace_mce_record(err);
> 
> -	set_bit(0, &mce_need_notify);
> -
> -	mce_notify_irq();
> -

I injected a correctable error with the CMCI interrupt enabled on an Intel testing machine, 
and this mce_early_notifier() was invoked.  But the following code in mce_notify_irq() is now
never executed, and I didn't see the error log message "Machine check events logged".

    ...
    mce_work_trigger();

    if (__ratelimit(&ratelimit))
        pr_info(HW_ERR "Machine check events logged\n");

    return true;
    ...

Thanks!
Qiuxu

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-07 15:04                                                 ` Zhuo, Qiuxu
@ 2026-04-14 21:18                                                   ` Borislav Petkov
  2026-04-14 22:22                                                     ` Luck, Tony
  2026-04-15 13:39                                                     ` Zhuo, Qiuxu
  0 siblings, 2 replies; 55+ messages in thread
From: Borislav Petkov @ 2026-04-14 21:18 UTC (permalink / raw)
  To: Zhuo, Qiuxu
  Cc: Li,Rongqing(ACG CCN), Luck, Tony, Nikolay Borisov,
	Thomas Gleixner, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

On Tue, Apr 07, 2026 at 03:04:04PM +0000, Zhuo, Qiuxu wrote:
> I injected a correctable error with the CMCI interrupt enabled on an Intel testing machine, 
> and this mce_early_notifier() was invoked.  But the following code in mce_notify_irq() is now
> never executed, and I didn't see the error log message "Machine check events logged".

You did disable the CEC, right?

In any case, let's have a look:

When we log an MCE, we do:

mce_log			# add it to the genpool and run the works
 -> mce_irq_work
  -> mce_schedule_work
   -> ..
    -> mce_gen_pool_process 	# this'll send it down the notifier chain
     -> x86_mce_decoder_chain
      -> mce_early_notifier 	# that guy sees it here and issues the trace record

Now, mce_notify_irq() would do mce_work_trigger() and issue the printk
- dunno, I guess we still want our printk and probably should add it back
- but the first one - the work triggering - that's mcelog. It is using that
usermode helper gunk, dunno if you guys still need it.

Because mcelog does register to the decoder chain so it'll get to see the MCE
eventually. So that part is fine.

The only question is the usermode helper gunk...

Tony?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-14 21:18                                                   ` Borislav Petkov
@ 2026-04-14 22:22                                                     ` Luck, Tony
  2026-04-15 19:27                                                       ` Borislav Petkov
  2026-04-15 13:39                                                     ` Zhuo, Qiuxu
  1 sibling, 1 reply; 55+ messages in thread
From: Luck, Tony @ 2026-04-14 22:22 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Zhuo, Qiuxu, Li,Rongqing(ACG CCN), Nikolay Borisov,
	Thomas Gleixner, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

On Tue, Apr 14, 2026 at 11:18:03PM +0200, Borislav Petkov wrote:
> On Tue, Apr 07, 2026 at 03:04:04PM +0000, Zhuo, Qiuxu wrote:
> > I injected a correctable error with the CMCI interrupt enabled on an Intel testing machine, 
> > and this mce_early_notifier() was invoked.  But the following code in mce_notify_irq() is now
> > never executed, and I didn't see the error log message "Machine check events logged".
> 
> You did disable the CEC, right?
> 
> In any case, let's have a look:
> 
> When we log an MCE, we do:
> 
> mce_log			# add it to the genpool and run the works
>  -> mce_irq_work
>   -> mce_schedule_work
>    -> ..
>     -> mce_gen_pool_process 	# this'll send it down the notifier chain
>      -> x86_mce_decoder_chain
>       -> mce_early_notifier 	# that guy sees it here and issues the trace record
> 
> Now, mce_notify_irq() would do mce_work_trigger() and issue the printk
> - dunno, I guess we still want our printk and probably should add it back
> - but the first one - the work triggering - that's mcelog. It is using that
> usermode helper gunk, dunno if you guys still need it.
> 
> Because mcelog does register to the decoder chain so it'll get to see the MCE
> eventually. So that part is fine.
> 
> The only question is the usermode helper gunk...
> 
> Tony?

Ran my own test. RAS_CEC disabled. Booted with mce=no_cmci injected a
corrected error every twenty seconds. Added pr_info() to mce_timer_fn()
to say which CPUs were doubling or halving interval.

Results:

I did see some "Machine check events logged" console messages.

The debug messages are "interesting". Polling timers on CPUs aren't
synchronized, so I got random bursts of debug messages where some
CPUs found an error and halved their interval, while others didn't
see an error and doubled their interval. The machine check banks for
memory corrected errors are socket scoped, so when an error is logged
whichever CPU on the socket polls next will find the error.

Both mcelog and EDAC were invoked on the mce decode chain and logged
errors OK.

When I stopped injecting, all the CPUs doubled back up to maximum
polling interval.

Summary: This is working as well as can be expected given the shared
scope of the machine check banks. If Linux were to understand the
scope of machine check banks it might designate a single CPU in
that scope to do the polling. But Intel doesn't make it easy to derive
the scope. In any case, the common case is CMCI enabled.

-Tony

^ permalink raw reply	[flat|nested] 55+ messages in thread

* RE: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-14 21:18                                                   ` Borislav Petkov
  2026-04-14 22:22                                                     ` Luck, Tony
@ 2026-04-15 13:39                                                     ` Zhuo, Qiuxu
  2026-04-15 19:35                                                       ` Borislav Petkov
  1 sibling, 1 reply; 55+ messages in thread
From: Zhuo, Qiuxu @ 2026-04-15 13:39 UTC (permalink / raw)
  To: Borislav Petkov, Luck, Tony
  Cc: Li,Rongqing(ACG CCN), Nikolay Borisov, Thomas Gleixner,
	Ingo Molnar, Dave Hansen, x86@kernel.org, H . Peter Anvin,
	Yazen Ghannam, Avadhut Naik, linux-kernel@vger.kernel.org,
	linux-edac@vger.kernel.org

Hi Boris,

> From: Borislav Petkov <bp@alien8.de>
> [...]
> Subject: Re: [PATCH] x86/mce: Restore MCA polling interval halving
> 
> On Tue, Apr 07, 2026 at 03:04:04PM +0000, Zhuo, Qiuxu wrote:
> > I injected a correctable error with the CMCI interrupt enabled on an
> > Intel testing machine, and this mce_early_notifier() was invoked.  But
> > the following code in mce_notify_irq() is now never executed, and I didn't
> see the error log message "Machine check events logged".
> 
> You did disable the CEC, right?

Yes, I disabled the RAS_CEC.

> In any case, let's have a look:
> 
> When we log an MCE, we do:
> 
> mce_log			# add it to the genpool and run the works
>  -> mce_irq_work
>   -> mce_schedule_work
>    -> ..
>     -> mce_gen_pool_process 	# this'll send it down the notifier chain
>      -> x86_mce_decoder_chain
>       -> mce_early_notifier 	# that guy sees it here and issues the trace
> record
> 
> Now, mce_notify_irq() would do mce_work_trigger() and issue the printk
> - dunno, I guess we still want our printk and probably should add it back

This printk is quite useful for checking whether an MCE error event occurs.
Even though it's rate-limited, I'd appreciate keeping it.

> - but the first one - the work triggering - that's mcelog. It is using that
> usermode helper gunk, dunno if you guys still need it.
>

I'm not sure whether any distros still use this user-mode helper to start mcelog or 
mcelog-like tools. @Luck, Tony, could you comment? 😊

> Because mcelog does register to the decoder chain so it'll get to see the MCE
> eventually. So that part is fine.
> 
> The only question is the usermode helper gunk...
> 
> Tony?

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-14 22:22                                                     ` Luck, Tony
@ 2026-04-15 19:27                                                       ` Borislav Petkov
  2026-04-15 19:53                                                         ` Luck, Tony
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-04-15 19:27 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Zhuo, Qiuxu, Li,Rongqing(ACG CCN), Nikolay Borisov,
	Thomas Gleixner, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

On Tue, Apr 14, 2026 at 03:22:23PM -0700, Luck, Tony wrote:
> Ran my own test. RAS_CEC disabled. Booted with mce=no_cmci injected a
> corrected error every twenty seconds. Added pr_info() to mce_timer_fn()
> to say which CPUs were doubling or halving interval.

Right, we still need some sort of a feedback that we've logged an error.

> Results:
> 
> I did see some "Machine check events logged" console messages.

Right, mce_timer_fn().

Not sure that is the right place tho. We want to issue that printk the moment
we log an MCE, perhaps in the early notifier or so, where mce_notify_irq()
was.

> The debug messages are "interesting". Polling timers on CPUs aren't
> synchronized, so I got random bursts of debug messages where some
> CPUs found an error and halved their interval, while others didn't
> see an error and doubled their interval. The machine check banks for
> memory corrected errors are socket scoped, so when an error is logged
> whichever CPU on the socket polls next will find the error.
> 
> Both mcelog and EDAC were invoked on the mce decode chain and logged
> errors OK.
> 
> When I stopped injecting, all the CPUs doubled back up to maximum
> polling interval.
> 
> Summary: This is working as well as can be expected given the shared
> scope of the machine check banks. If Linux were to understand the
> scope of machine check banks it might designate a single CPU in
> that scope to do the polling. But Intel doesn't make it easy to derive
> the scope. In any case, the common case is CMCI enabled.

Thanks for the testing - much appreciated.

One aspect remained unanswered: 

mce_notify_irq -> mce_work_trigger -> schedule_work(&mce_trigger_work); ->
mce_do_trigger -> 

	call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);

Is that thing still used?

If so, what is the use case? Is per-chance that mce_helper the userspace
mcelog tool which the kernel calls here on a MCE?

Or?

Do we need that still?

If not, ripping that out would be a nice, additional simplification.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-15 13:39                                                     ` Zhuo, Qiuxu
@ 2026-04-15 19:35                                                       ` Borislav Petkov
  0 siblings, 0 replies; 55+ messages in thread
From: Borislav Petkov @ 2026-04-15 19:35 UTC (permalink / raw)
  To: Zhuo, Qiuxu
  Cc: Luck, Tony, Li,Rongqing(ACG CCN), Nikolay Borisov,
	Thomas Gleixner, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

On Wed, Apr 15, 2026 at 01:39:06PM +0000, Zhuo, Qiuxu wrote:
> I'm not sure whether any distros still use this user-mode helper to start mcelog or 
> mcelog-like tools. @Luck, Tony, could you comment? 😊

Old distros probably but they don't update to the newest kernel...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* RE: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-15 19:27                                                       ` Borislav Petkov
@ 2026-04-15 19:53                                                         ` Luck, Tony
  2026-04-15 20:02                                                           ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Luck, Tony @ 2026-04-15 19:53 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Zhuo, Qiuxu, Li,Rongqing(ACG CCN), Nikolay Borisov,
	Thomas Gleixner, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

> One aspect remained unanswered:
>
> mce_notify_irq -> mce_work_trigger -> schedule_work(&mce_trigger_work); ->
> mce_do_trigger ->
>
>       call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT);
>
> Is that thing still used?
>
> If so, what is the use case? Is per-chance that mce_helper the userspace
> mcelog tool which the kernel calls here on a MCE?
>
> Or?
>
> Do we need that still?
>
> If not, ripping that out would be a nice, additional simplification.

It is documented here: https://github.com/andikleen/mcelog?tab=readme-ov-file#readme

Three ways to invoke mcelog (cron job, trigger, daemon). That doc describes
trigger as "a newer method", but git blame shows that is part of the original version of the
README,md from 2016. So "new" some time more than 10 years ago.

That doc recommends daemon mode. Also, there's a geologically slow trend moving
from mcelog to rasdaemon.

Maybe we can drop it, but with the caveat that we'd need to revert if anyone ever
complains about breakage to user interface (to avoid the wrath of Torvalds).

I could update the mcelog document to say trigger mode supported in kernel version
<= 7.0

-Tony

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-15 19:53                                                         ` Luck, Tony
@ 2026-04-15 20:02                                                           ` Borislav Petkov
  2026-04-17 11:50                                                             ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-04-15 20:02 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Zhuo, Qiuxu, Li,Rongqing(ACG CCN), Nikolay Borisov,
	Thomas Gleixner, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

On Wed, Apr 15, 2026 at 07:53:58PM +0000, Luck, Tony wrote:
> It is documented here: https://github.com/andikleen/mcelog?tab=readme-ov-file#readme
> 
> Three ways to invoke mcelog (cron job, trigger, daemon). That doc describes
> trigger as "a newer method", but git blame shows that is part of the original version of the
> README,md from 2016. So "new" some time more than 10 years ago.
> 
> That doc recommends daemon mode. Also, there's a geologically slow trend moving
> from mcelog to rasdaemon.
> 
> Maybe we can drop it, but with the caveat that we'd need to revert if anyone ever
> complains about breakage to user interface (to avoid the wrath of Torvalds).
> 
> I could update the mcelog document to say trigger mode supported in kernel version
> <= 7.0

Or simply deprecate it...

I mean, the savings are nothing earth-shattering to even do the effort here so
pls do whatever's the easiest.

Lemme think about how to restructure this patch of mine...

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-15 20:02                                                           ` Borislav Petkov
@ 2026-04-17 11:50                                                             ` Borislav Petkov
  2026-04-20 14:14                                                               ` Zhuo, Qiuxu
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-04-17 11:50 UTC (permalink / raw)
  To: Luck, Tony, Zhuo, Qiuxu, Nikolay Borisov
  Cc: Li,Rongqing(ACG CCN), Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Yazen Ghannam, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

On Wed, Apr 15, 2026 at 10:02:03PM +0200, Borislav Petkov wrote:
> Lemme think about how to restructure this patch of mine...

Ok, totally untested. This is only to show the idea. I've basically went and
distributed the functionality where it fits best: the pr_info logging at
mce_log time and the work trigger in the notifier. It ended up like below.

I'll run it but I'd let you folks check it first, whether I've missed an angle
conceptually.

Thx.

---
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 8dd424ac5de8..f3a793e3a6c8 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -90,7 +90,6 @@ struct mca_config mca_cfg __read_mostly = {
 };
 
 static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen);
-static unsigned long mce_need_notify;
 
 /*
  * MCA banks polled by the period polling timer for corrected events.
@@ -152,8 +151,10 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm);
 
 void mce_log(struct mce_hw_err *err)
 {
-	if (mce_gen_pool_add(err))
+	if (mce_gen_pool_add(err)) {
+		pr_info(HW_ERR "Machine check events logged\n");
 		irq_work_queue(&mce_irq_work);
+	}
 }
 EXPORT_SYMBOL_GPL(mce_log);
 
@@ -585,28 +586,6 @@ bool mce_is_correctable(struct mce *m)
 }
 EXPORT_SYMBOL_GPL(mce_is_correctable);
 
-/*
- * Notify the user(s) about new machine check events.
- * Can be called from interrupt context, but not from machine check/NMI
- * context.
- */
-static bool mce_notify_irq(void)
-{
-	/* Not more than two messages every minute */
-	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
-
-	if (test_and_clear_bit(0, &mce_need_notify)) {
-		mce_work_trigger();
-
-		if (__ratelimit(&ratelimit))
-			pr_info(HW_ERR "Machine check events logged\n");
-
-		return true;
-	}
-
-	return false;
-}
-
 static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
 			      void *data)
 {
@@ -618,9 +597,7 @@ static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
 	/* Emit the trace record: */
 	trace_mce_record(err);
 
-	set_bit(0, &mce_need_notify);
-
-	mce_notify_irq();
+	mce_work_trigger();
 
 	return NOTIFY_DONE;
 }
@@ -1804,7 +1781,7 @@ static void mce_timer_fn(struct timer_list *t)
 	 * Alert userspace if needed. If we logged an MCE, reduce the polling
 	 * interval, otherwise increase the polling interval.
 	 */
-	if (mce_notify_irq())
+	if (!mce_gen_pool_empty())
 		iv = max(iv / 2, (unsigned long) HZ/100);
 	else
 		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* RE: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-17 11:50                                                             ` Borislav Petkov
@ 2026-04-20 14:14                                                               ` Zhuo, Qiuxu
  2026-04-21 12:05                                                                 ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Zhuo, Qiuxu @ 2026-04-20 14:14 UTC (permalink / raw)
  To: Borislav Petkov, Luck, Tony, Nikolay Borisov
  Cc: Li,Rongqing(ACG CCN), Thomas Gleixner, Ingo Molnar, Dave Hansen,
	x86@kernel.org, H . Peter Anvin, Yazen Ghannam, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

Hi Boris,

> From: Borislav Petkov <bp@alien8.de>
> [...]
> 
> On Wed, Apr 15, 2026 at 10:02:03PM +0200, Borislav Petkov wrote:
> > Lemme think about how to restructure this patch of mine...
> 
> Ok, totally untested. This is only to show the idea. I've basically went and
> distributed the functionality where it fits best: the pr_info logging at mce_log
> time and the work trigger in the notifier. It ended up like below.
> 
> I'll run it but I'd let you folks check it first, whether I've missed an angle
> conceptually.
> 

1. Test precondition:
     - Added debug messages [1] on top of Boris' patch.
     - RAS_CEC was disabled.
     - A correctable error was injected every 10 seconds.

2. Tested with CMCI interrupts enabled:
   - The message "Machine check events logged" was printed each time a correctable error was injected.
   - EDAC and mcelog in the decode chain were notified as expected.

    So, this part tested OK.

3. Tested in polling mode (boot with "mce=no_cmci"):
   - A CPU’s timer interval was halved after calling mce_log(), or when !mce_gen_pool_empty() was true during polling [2].
   - A CPU’s timer interval was doubled when mce_gen_pool_empty() was true during polling [2].

    This part tested OK, but please see comments below about mce_gen_pool_empty() check in mce_timer_fn().
   
[1]
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index f3a793e3a6c8..927dcdb15ff4 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -152,7 +152,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm);
 void mce_log(struct mce_hw_err *err)
 {
        if (mce_gen_pool_add(err)) {
-               pr_info(HW_ERR "Machine check events logged\n");
+               pr_info(HW_ERR "Machine check events logged by CPU %d\n", smp_processor_id());
                irq_work_queue(&mce_irq_work);
        }
 }
@@ -1781,10 +1781,13 @@ static void mce_timer_fn(struct timer_list *t)
         * Alert userspace if needed. If we logged an MCE, reduce the polling
         * interval, otherwise increase the polling interval.
         */
-       if (!mce_gen_pool_empty())
+       if (!mce_gen_pool_empty()) {
                iv = max(iv / 2, (unsigned long) HZ/100);
-       else
+               pr_info("!mce_gen_pool_empty() - CPU %d halves timer interval %ums\n", smp_processor_id(), jiffies_to_msecs(iv));
+       } else {
                iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+               pr_info(" mce_gen_pool_empty() - CPU %d doubles timer interval %ums\n", smp_processor_id(), jiffies_to_msecs(iv));
+       }

        if (mce_get_storm_mode()) {
                __start_timer(t, HZ);

[2] See example of 'CPU 82':

dmesg | grep -E 'Machine check events logged|CPU 82' | grep -v "EDAC"

[  323.797260] mce: [Hardware Error]: Machine check events logged by CPU 82
[  323.804985] mce: [Hardware Error]: Machine check events logged by CPU 82
[  323.812618] mce: [Hardware Error]: Machine check events logged by CPU 82
[  323.820237] mce: [Hardware Error]: Machine check events logged by CPU 82
[  323.827868] mce: !mce_gen_pool_empty() - CPU 82 halves timer interval 150000ms
[  323.827970] mce: [Hardware Error]: Machine check events logged by CPU 147
[  487.635781] mce: [Hardware Error]: Machine check events logged by CPU 219
[  487.652751] mce: [Hardware Error]: Machine check events logged by CPU 219
[  487.660571] mce: [Hardware Error]: Machine check events logged by CPU 219
[  487.668386] mce: [Hardware Error]: Machine check events logged by CPU 219
[  487.676195] mce: [Hardware Error]: Machine check events logged by CPU 219
[  487.684874] mce: !mce_gen_pool_empty() - CPU 82 halves timer interval 75000ms
[  563.411184] mce: [Hardware Error]: Machine check events logged by CPU 88
[  563.427845] mce: [Hardware Error]: Machine check events logged by CPU 88
[  563.435553] mce: [Hardware Error]: Machine check events logged by CPU 88
[  563.444290] mce: !mce_gen_pool_empty() - CPU 82 halves timer interval 37500ms
[  602.322784] mce: [Hardware Error]: Machine check events logged by CPU 241
[  602.331355] mce: [Hardware Error]: Machine check events logged by CPU 241
[  602.339264] mce: !mce_gen_pool_empty() - CPU 82 halves timer interval 18748ms
[  622.802721] mce: [Hardware Error]: Machine check events logged by CPU 82
[  622.811199] mce: [Hardware Error]: Machine check events logged by CPU 82
[  622.818948] mce: !mce_gen_pool_empty() - CPU 82 halves timer interval 9372ms
[  632.018480] mce: [Hardware Error]: Machine check events logged by CPU 273
[  632.026526] mce: [Hardware Error]: Machine check events logged by CPU 185
[  632.275383] mce:  mce_gen_pool_empty() - CPU 82 doubles timer interval 18744ms
[  647.122282] mce: [Hardware Error]: Machine check events logged by CPU 273
[  651.475854] mce:  mce_gen_pool_empty() - CPU 82 doubles timer interval 37488ms
[  661.970112] mce: [Hardware Error]: Machine check events logged by CPU 273
[  677.073945] mce: [Hardware Error]: Machine check events logged by CPU 273
[  682.193878] mce: [Hardware Error]: Machine check events logged by CPU 273
[  690.386214] mce:  mce_gen_pool_empty() - CPU 82 doubles timer interval 74976ms
[  692.433727] mce: [Hardware Error]: Machine check events logged by CPU 225
[  712.913487] mce: [Hardware Error]: Machine check events logged by CPU 113
[  717.009440] mce: [Hardware Error]: Machine check events logged by CPU 232
[  743.632392] mce: [Hardware Error]: Machine check events logged by CPU 113
[  743.640869] mce: [Hardware Error]: Machine check events logged by CPU 113
[  757.967947] mce: [Hardware Error]: Machine check events logged by CPU 273
[  766.160445] mce:  mce_gen_pool_empty() - CPU 82 doubles timer interval 149952ms
[  768.207807] mce: [Hardware Error]: Machine check events logged by CPU 253
[  792.783453] mce: [Hardware Error]: Machine check events logged by CPU 234
[  807.119257] mce: [Hardware Error]: Machine check events logged by CPU 253
[  817.359155] mce: [Hardware Error]: Machine check events logged by CPU 273
[  831.695030] mce: [Hardware Error]: Machine check events logged by CPU 234
[  852.174749] mce: [Hardware Error]: Machine check events logged by CPU 232
[  861.646550] mce: [Hardware Error]: Machine check events logged by CPU 232
[  866.510500] mce: [Hardware Error]: Machine check events logged by CPU 234
[  884.430286] mce: [Hardware Error]: Machine check events logged by CPU 234
[  899.534081] mce: [Hardware Error]: Machine check events logged by CPU 234
[  904.654067] mce: [Hardware Error]: Machine check events logged by CPU 234
[  922.573822] mce: [Hardware Error]: Machine check events logged by CPU 234
[  929.998246] mce: [Hardware Error]: Machine check events logged by CPU 261
[  930.000003] mce:  mce_gen_pool_empty() - CPU 82 doubles timer interval 299904ms
[  944.333567] mce: [Hardware Error]: Machine check events logged by CPU 232
[  952.525529] mce: [Hardware Error]: Machine check events logged by CPU 273
[  964.813362] mce: [Hardware Error]: Machine check events logged by CPU 232
[  979.149459] mce: [Hardware Error]: Machine check events logged by CPU 225
[  991.436910] mce: [Hardware Error]: Machine check events logged by CPU 273
[ 1005.772632] mce: [Hardware Error]: Machine check events logged by CPU 261
[ 1028.300439] mce: [Hardware Error]: Machine check events logged by CPU 233
[ 1028.300522] mce: [Hardware Error]: Machine check events logged by CPU 233
[ 1044.683195] mce: [Hardware Error]: Machine check events logged by CPU 261
[ 1054.922669] mce: [Hardware Error]: Machine check events logged by CPU 225
[ 1065.162622] mce: [Hardware Error]: Machine check events logged by CPU 261


> Thx.
> 
> ---
> diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
> index 8dd424ac5de8..f3a793e3a6c8 100644
> --- a/arch/x86/kernel/cpu/mce/core.c
> +++ b/arch/x86/kernel/cpu/mce/core.c
> @@ -90,7 +90,6 @@ struct mca_config mca_cfg __read_mostly = {  };
> 
>  static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen); -static unsigned
> long mce_need_notify;
> 
>  /*
>   * MCA banks polled by the period polling timer for corrected events.
> @@ -152,8 +151,10 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm);
> 
>  void mce_log(struct mce_hw_err *err)
>  {
> -	if (mce_gen_pool_add(err))
> +	if (mce_gen_pool_add(err)) {
> +		pr_info(HW_ERR "Machine check events logged\n");
>  		irq_work_queue(&mce_irq_work);
> +	}
>  }
>  EXPORT_SYMBOL_GPL(mce_log);
> 
> @@ -585,28 +586,6 @@ bool mce_is_correctable(struct mce *m)  }
> EXPORT_SYMBOL_GPL(mce_is_correctable);
> 
> -/*
> - * Notify the user(s) about new machine check events.
> - * Can be called from interrupt context, but not from machine check/NMI
> - * context.
> - */
> -static bool mce_notify_irq(void)
> -{
> -	/* Not more than two messages every minute */
> -	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
> -
> -	if (test_and_clear_bit(0, &mce_need_notify)) {
> -		mce_work_trigger();
> -
> -		if (__ratelimit(&ratelimit))
> -			pr_info(HW_ERR "Machine check events logged\n");
> -
> -		return true;
> -	}
> -
> -	return false;
> -}
> -
>  static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
>  			      void *data)
>  {
> @@ -618,9 +597,7 @@ static int mce_early_notifier(struct notifier_block *nb,
> unsigned long val,
>  	/* Emit the trace record: */
>  	trace_mce_record(err);
> 
> -	set_bit(0, &mce_need_notify);
> -
> -	mce_notify_irq();
> +	mce_work_trigger();
> 
>  	return NOTIFY_DONE;
>  }
> @@ -1804,7 +1781,7 @@ static void mce_timer_fn(struct timer_list *t)
>  	 * Alert userspace if needed. If we logged an MCE, reduce the polling
>  	 * interval, otherwise increase the polling interval.
>  	 */
> -	if (mce_notify_irq())
> +	if (!mce_gen_pool_empty())

mce_timer_fn()
  machine_check_poll()
        mce_log()
          irq_work_queue(&mce_irq_work)
            ...
              mce_irq_work_cb()
                mce_schedule_work()
                  schedule_work(&mce_work)
                    ...
                      mce_gen_pool_process() // [3] worker thread concurrently running on any CPU handles MCE logs.

  mce_gen_pool_empty() // [4]

It seems there is a race between [3] and [4].
Although my testing did not observe this race, it's possible 
that mce_timer_fn() (in softirq) completes fast 
enough that it always finishes before [1] (in worker thread) is scheduled to run.

>  		iv = max(iv / 2, (unsigned long) HZ/100);
>  	else
>  		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
> 

[...]

Thanks!
- Qiuxu

^ permalink raw reply related	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-20 14:14                                                               ` Zhuo, Qiuxu
@ 2026-04-21 12:05                                                                 ` Borislav Petkov
  2026-04-21 15:49                                                                   ` Zhuo, Qiuxu
  0 siblings, 1 reply; 55+ messages in thread
From: Borislav Petkov @ 2026-04-21 12:05 UTC (permalink / raw)
  To: Zhuo, Qiuxu
  Cc: Luck, Tony, Nikolay Borisov, Li,Rongqing(ACG CCN),
	Thomas Gleixner, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

Hi Qiuxu,

On Mon, Apr 20, 2026 at 02:14:52PM +0000, Zhuo, Qiuxu wrote:
> 1. Test precondition:
>      - Added debug messages [1] on top of Boris' patch.
>      - RAS_CEC was disabled.
>      - A correctable error was injected every 10 seconds.
> 
> 2. Tested with CMCI interrupts enabled:
>    - The message "Machine check events logged" was printed each time a correctable error was injected.
>    - EDAC and mcelog in the decode chain were notified as expected.
> 
>     So, this part tested OK.
> 
> 3. Tested in polling mode (boot with "mce=no_cmci"):
>    - A CPU’s timer interval was halved after calling mce_log(), or when !mce_gen_pool_empty() was true during polling [2].
>    - A CPU’s timer interval was doubled when mce_gen_pool_empty() was true during polling [2].
> 
>     This part tested OK, but please see comments below about mce_gen_pool_empty() check in mce_timer_fn().

Thanks for testing.

> mce_timer_fn()
>   machine_check_poll()
>         mce_log()
>           irq_work_queue(&mce_irq_work)
>             ...
>               mce_irq_work_cb()
>                 mce_schedule_work()
>                   schedule_work(&mce_work)
>                     ...
>                       mce_gen_pool_process() // [3] worker thread concurrently running on any CPU handles MCE logs.
> 
>   mce_gen_pool_empty() // [4]
> 
> It seems there is a race between [3] and [4].
> Although my testing did not observe this race, it's possible 
> that mce_timer_fn() (in softirq) completes fast 
> enough that it always finishes before [1] (in worker thread) is scheduled to run.

Does this and the next message in the thread explain the situation?

https://lore.kernel.org/r/20260207115142.GBaYcnTp7maUDVv3Nc@fat_crate.local

Bottom line: I don't think this was ever meant to be anything but a rough and
simple method to catch too many errors being logged and halve the polling
interval.

IOW, even if the above race happens, in the abundance of too many errors, it
would pick up and start halving eventually.

Right?

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply	[flat|nested] 55+ messages in thread

* RE: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-21 12:05                                                                 ` Borislav Petkov
@ 2026-04-21 15:49                                                                   ` Zhuo, Qiuxu
  2026-04-23 12:49                                                                     ` Borislav Petkov
  0 siblings, 1 reply; 55+ messages in thread
From: Zhuo, Qiuxu @ 2026-04-21 15:49 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Luck, Tony, Nikolay Borisov, Li,Rongqing(ACG CCN),
	Thomas Gleixner, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

Hi Boris,

> From: Borislav Petkov <bp@alien8.de>
> [...]
> > mce_timer_fn()
> >   machine_check_poll()
> >         mce_log()
> >           irq_work_queue(&mce_irq_work)
> >             ...
> >               mce_irq_work_cb()
> >                 mce_schedule_work()
> >                   schedule_work(&mce_work)
> >                     ...
> >                       mce_gen_pool_process() // [3] worker thread concurrently
> running on any CPU handles MCE logs.
> >
> >   mce_gen_pool_empty() // [4]
> >
> > It seems there is a race between [3] and [4].
> > Although my testing did not observe this race, it's possible that
> > mce_timer_fn() (in softirq) completes fast enough that it always
> > finishes before [1] (in worker thread) is scheduled to run.
> 
> Does this and the next message in the thread explain the situation?

Yes. Thanks for pointing out this link.

> https://lore.kernel.org/r/20260207115142.GBaYcnTp7maUDVv3Nc@fat_crate.l
> ocal
> 
> Bottom line: I don't think this was ever meant to be anything but a rough and
> simple method to catch too many errors being logged and halve the polling
> interval.

Agree.

> IOW, even if the above race happens, in the abundance of too many errors, it
> would pick up and start halving eventually.
> 
> Right?
>

Yes, I think so. 

So, to me, the current rough and simple method for catching frequent error cases is a good
trade-off between accuracy and complexity.

  Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> 
  Tested-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> 

Thanks!
-Qiuxu

^ permalink raw reply	[flat|nested] 55+ messages in thread

* Re: [PATCH] x86/mce: Restore MCA polling interval halving
  2026-04-21 15:49                                                                   ` Zhuo, Qiuxu
@ 2026-04-23 12:49                                                                     ` Borislav Petkov
  0 siblings, 0 replies; 55+ messages in thread
From: Borislav Petkov @ 2026-04-23 12:49 UTC (permalink / raw)
  To: Zhuo, Qiuxu
  Cc: Luck, Tony, Nikolay Borisov, Li,Rongqing(ACG CCN),
	Thomas Gleixner, Ingo Molnar, Dave Hansen, x86@kernel.org,
	H . Peter Anvin, Yazen Ghannam, Avadhut Naik,
	linux-kernel@vger.kernel.org, linux-edac@vger.kernel.org

On Tue, Apr 21, 2026 at 03:49:06PM +0000, Zhuo, Qiuxu wrote:
> Yes, I think so. 
> 
> So, to me, the current rough and simple method for catching frequent error cases is a good
> trade-off between accuracy and complexity.
> 
>   Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> 
>   Tested-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com> 

Thanks!

I have this now - I will hammer on it some more before I queue it.

---
Author: Borislav Petkov (AMD) <bp@alien8.de>
Date:   Mon Mar 16 16:12:00 2026 +0100

    x86/mce: Restore MCA polling interval halving
    
    RongQing reported that the MCA polling interval doesn't halve when an
    error gets logged. It was traced down to the commit in Fixes:, because:
    
      mce_timer_fn()
      |-> mce_poll_banks()
      |-> machine_check_poll()
      |-> mce_log()
    
    which will queue the work and return.
    
    Now, back in mce_timer_fn():
    
            /*
             * Alert userspace if needed. If we logged an MCE, reduce the polling
             * interval, otherwise increase the polling interval.
             */
            if (mce_notify_irq())
    
    <--- here we haven't ran the notifier chain yet so mce_need_notify is
    not set yet so this won't hit and we won't halve the interval iv.
    
    Now the notifier chain runs. mce_early_notifier() sets the bit, does
    mce_notify_irq(), that clears the bit and then the notifier chain
    a little later logs the error.
    
    So this is a silly timing issue.
    
    But, that's all unnecessary.
    
    All it needs to happen here is, the "should we notify of a logged MCE"
    mce_notify_irq() asks, should be simply a question to the mce gen pool:
    "Are you empty?"
    
    And that then turns into a simple yes or no answer and it all
    JustWorks(tm).
    
    So do that and also distribute the functionality where it belongs:
     - Print that MCE events have been logged in mce_log()
     - Trigger the mcelog tool specific work in the first notifier
    
    As a result, mce_notify_irq() can go now.
    
    Fixes: 011d82611172 ("RAS: Add a Corrected Errors Collector")
    Reported-by: Li RongQing <lirongqing@baidu.com>
    Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
    Reviewed-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
    Tested-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
    Link: https://lore.kernel.org/r/20260112082747.2842-1-lirongqing@baidu.com

diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 8dd424ac5de8..f3a793e3a6c8 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -90,7 +90,6 @@ struct mca_config mca_cfg __read_mostly = {
 };
 
 static DEFINE_PER_CPU(struct mce_hw_err, hw_errs_seen);
-static unsigned long mce_need_notify;
 
 /*
  * MCA banks polled by the period polling timer for corrected events.
@@ -152,8 +151,10 @@ EXPORT_PER_CPU_SYMBOL_GPL(injectm);
 
 void mce_log(struct mce_hw_err *err)
 {
-	if (mce_gen_pool_add(err))
+	if (mce_gen_pool_add(err)) {
+		pr_info(HW_ERR "Machine check events logged\n");
 		irq_work_queue(&mce_irq_work);
+	}
 }
 EXPORT_SYMBOL_GPL(mce_log);
 
@@ -585,28 +586,6 @@ bool mce_is_correctable(struct mce *m)
 }
 EXPORT_SYMBOL_GPL(mce_is_correctable);
 
-/*
- * Notify the user(s) about new machine check events.
- * Can be called from interrupt context, but not from machine check/NMI
- * context.
- */
-static bool mce_notify_irq(void)
-{
-	/* Not more than two messages every minute */
-	static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
-
-	if (test_and_clear_bit(0, &mce_need_notify)) {
-		mce_work_trigger();
-
-		if (__ratelimit(&ratelimit))
-			pr_info(HW_ERR "Machine check events logged\n");
-
-		return true;
-	}
-
-	return false;
-}
-
 static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
 			      void *data)
 {
@@ -618,9 +597,7 @@ static int mce_early_notifier(struct notifier_block *nb, unsigned long val,
 	/* Emit the trace record: */
 	trace_mce_record(err);
 
-	set_bit(0, &mce_need_notify);
-
-	mce_notify_irq();
+	mce_work_trigger();
 
 	return NOTIFY_DONE;
 }
@@ -1804,7 +1781,7 @@ static void mce_timer_fn(struct timer_list *t)
 	 * Alert userspace if needed. If we logged an MCE, reduce the polling
 	 * interval, otherwise increase the polling interval.
 	 */
-	if (mce_notify_irq())
+	if (!mce_gen_pool_empty())
 		iv = max(iv / 2, (unsigned long) HZ/100);
 	else
 		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));


-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

^ permalink raw reply related	[flat|nested] 55+ messages in thread

end of thread, other threads:[~2026-04-23 12:50 UTC | newest]

Thread overview: 55+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-01-12  8:27 [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event lirongqing
2026-01-12  8:56 ` Nikolay Borisov
2026-01-12  9:36   ` 答复: [外部邮件] " Li,Rongqing
2026-01-12  9:51     ` Borislav Petkov
2026-01-12 10:24       ` 答复: " Li,Rongqing
2026-01-13  9:51         ` Borislav Petkov
     [not found]           ` <39cfb093256f4da78fe0bc9e814ce5d0@baidu.com>
2026-01-13 12:48             ` 答复: " Borislav Petkov
2026-01-13 18:53               ` Luck, Tony
2026-01-13 18:55                 ` Nikolay Borisov
2026-01-13 19:13                   ` Borislav Petkov
2026-01-13 19:25                     ` Nikolay Borisov
2026-01-13 19:33                       ` Borislav Petkov
2026-01-13 19:37                         ` Nikolay Borisov
2026-01-13 19:44                           ` Borislav Petkov
2026-01-13 19:51                             ` Nikolay Borisov
2026-01-13 20:33                               ` Borislav Petkov
2026-01-13 19:10                 ` Borislav Petkov
2026-01-13 19:31                 ` Nikolay Borisov
2026-01-13 20:30                 ` Thomas Gleixner
2026-01-13 20:56                 ` Borislav Petkov
2026-01-13 21:05                   ` Luck, Tony
2026-01-13 21:31                     ` Borislav Petkov
2026-01-13 22:41                       ` Borislav Petkov
2026-01-14  0:30                         ` Luck, Tony
2026-01-14 13:50                           ` Borislav Petkov
2026-01-14 14:48                             ` Borislav Petkov
2026-02-02 15:18                               ` Borislav Petkov
2026-02-02 23:49                                 ` 答复: [外部邮件] " Li,Rongqing
2026-02-06 22:03                                   ` Borislav Petkov
2026-02-07 11:51                             ` Borislav Petkov
2026-02-09 17:37                               ` Luck, Tony
2026-02-10 15:01                                 ` Borislav Petkov
2026-03-06  7:37                                   ` 答复: [外部邮件] " Li,Rongqing(ACG CCN)
2026-03-06 14:00                                     ` Borislav Petkov
2026-03-06 14:38                                       ` 答复: " Li,Rongqing(ACG CCN)
2026-03-06 15:29                                         ` Borislav Petkov
2026-03-07  1:18                                           ` 答复: " Li,Rongqing(ACG CCN)
2026-03-16 13:44                                             ` Borislav Petkov
2026-04-06 22:49                                               ` [PATCH] x86/mce: Restore MCA polling interval halving Borislav Petkov
2026-04-07 12:51                                                 ` Nikolay Borisov
2026-04-07 15:04                                                 ` Zhuo, Qiuxu
2026-04-14 21:18                                                   ` Borislav Petkov
2026-04-14 22:22                                                     ` Luck, Tony
2026-04-15 19:27                                                       ` Borislav Petkov
2026-04-15 19:53                                                         ` Luck, Tony
2026-04-15 20:02                                                           ` Borislav Petkov
2026-04-17 11:50                                                             ` Borislav Petkov
2026-04-20 14:14                                                               ` Zhuo, Qiuxu
2026-04-21 12:05                                                                 ` Borislav Petkov
2026-04-21 15:49                                                                   ` Zhuo, Qiuxu
2026-04-23 12:49                                                                     ` Borislav Petkov
2026-04-15 13:39                                                     ` Zhuo, Qiuxu
2026-04-15 19:35                                                       ` Borislav Petkov
2026-01-14  6:17                         ` [PATCH] x86/mce: Fix timer interval adjustment after logging a MCE event Nikolay Borisov
2026-01-14 13:52                           ` Borislav Petkov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox