* [RESEND PATCH] mce, edac: use an atomic notifier for MCEs decoding
@ 2009-10-07 11:20 Borislav Petkov
2009-10-12 10:33 ` Ingo Molnar
0 siblings, 1 reply; 4+ messages in thread
From: Borislav Petkov @ 2009-10-07 11:20 UTC (permalink / raw)
To: Ingo Molnar; +Cc: x86, linux-kernel
Add an atomic notifier which ensures proper locking when conveying MCE
info to EDAC for decoding. The actual notifier call overrides a default,
negative priority notifier.
Note: make sure we register the default decoder only once since
mcheck_init() runs on each CPU.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <20091003065752.GA8935@liondog.tnic>
---
arch/x86/include/asm/mce.h | 3 ++-
arch/x86/kernel/cpu/mcheck/mce.c | 30 +++++++++++++++++++++---------
drivers/edac/edac_mce_amd.c | 23 ++++++++++++++---------
3 files changed, 37 insertions(+), 19 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index f1363b7..227a72d 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -108,6 +108,8 @@ struct mce_log {
#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
+extern struct atomic_notifier_head x86_mce_decoder_chain;
+
#ifdef __KERNEL__
#include <linux/percpu.h>
@@ -213,6 +215,5 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
void intel_init_thermal(struct cpuinfo_x86 *c);
void mce_log_therm_throt_event(__u64 status);
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b1598a9..09c2e2e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -85,18 +85,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
-static void default_decode_mce(struct mce *m)
+/*
+ * CPU/chipset specific EDAC code can register a notifier call here to print
+ * MCE errors in a human-readable form.
+ */
+ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
+
+static int default_decode_mce(struct notifier_block *nb, unsigned long val,
+ void *data)
{
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
+
+ return NOTIFY_STOP;
}
-/*
- * CPU/chipset specific EDAC code can register a callback here to print
- * MCE errors in a human-readable form:
- */
-void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
-EXPORT_SYMBOL(x86_mce_decode_callback);
+static struct notifier_block mce_dec_nb = {
+ .notifier_call = default_decode_mce,
+ .priority = -1,
+};
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -204,9 +212,9 @@ static void print_mce(struct mce *m)
/*
* Print out human-readable details about the MCE error,
- * (if the CPU has an implementation for that):
+ * (if the CPU has an implementation for that)
*/
- x86_mce_decode_callback(m);
+ atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
}
static void print_mce_head(void)
@@ -1420,6 +1428,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
mce_cpu_features(c);
mce_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
+
+ if (raw_smp_processor_id() == 0)
+ atomic_notifier_chain_register(&x86_mce_decoder_chain,
+ &mce_dec_nb);
}
/*
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
index 713ed7d..6ee7529 100644
--- a/drivers/edac/edac_mce_amd.c
+++ b/drivers/edac/edac_mce_amd.c
@@ -3,7 +3,6 @@
static bool report_gart_errors;
static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
-static void (*orig_mce_callback)(struct mce *m);
void amd_report_gart_errors(bool v)
{
@@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec)
pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
}
-static void amd_decode_mce(struct mce *m)
+static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
+ void *data)
{
+ struct mce *m = (struct mce *)data;
struct err_regs regs;
int node, ecc;
@@ -420,20 +421,23 @@ static void amd_decode_mce(struct mce *m)
}
amd_decode_err_code(m->status & 0xffff);
+
+ return NOTIFY_STOP;
}
+static struct notifier_block amd_mce_dec_nb = {
+ .notifier_call = amd_decode_mce,
+};
+
static int __init mce_amd_init(void)
{
/*
* We can decode MCEs for Opteron and later CPUs:
*/
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
- (boot_cpu_data.x86 >= 0xf)) {
- /* safe the default decode mce callback */
- orig_mce_callback = x86_mce_decode_callback;
-
- x86_mce_decode_callback = amd_decode_mce;
- }
+ (boot_cpu_data.x86 >= 0xf))
+ atomic_notifier_chain_register(&x86_mce_decoder_chain,
+ &amd_mce_dec_nb);
return 0;
}
@@ -442,7 +446,8 @@ early_initcall(mce_amd_init);
#ifdef MODULE
static void __exit mce_amd_exit(void)
{
- x86_mce_decode_callback = orig_mce_callback;
+ atomic_notifier_chain_unregister(&x86_mce_decoder_chain,
+ &amd_mce_dec_nb);
}
MODULE_DESCRIPTION("AMD MCE decoder");
--
1.6.4.3
--
Regards/Gruss,
Boris.
Operating | Advanced Micro Devices GmbH
System | Karl-Hammerschmidt-Str. 34, 85609 Dornach b. München, Germany
Research | Geschäftsführer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
(OSRC) | Registergericht München, HRB Nr. 43632
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RESEND PATCH] mce, edac: use an atomic notifier for MCEs decoding
2009-10-07 11:20 [RESEND PATCH] mce, edac: use an atomic notifier for MCEs decoding Borislav Petkov
@ 2009-10-12 10:33 ` Ingo Molnar
2009-10-13 15:43 ` [PATCH 1/2] x86, mce: fixup mce naming nomenclature Borislav Petkov
2009-10-13 15:44 ` [PATCH 2/2] x86, mce: Add a global MCE init helper Borislav Petkov
0 siblings, 2 replies; 4+ messages in thread
From: Ingo Molnar @ 2009-10-12 10:33 UTC (permalink / raw)
To: Borislav Petkov; +Cc: linux-kernel, H. Peter Anvin, Thomas Gleixner
* Borislav Petkov <borislav.petkov@amd.com> wrote:
> @@ -1420,6 +1428,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
> mce_cpu_features(c);
> mce_init_timer();
> INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
> +
> + if (raw_smp_processor_id() == 0)
> + atomic_notifier_chain_register(&x86_mce_decoder_chain,
> + &mce_dec_nb);
> }
this bit should be done in a cleaner way, by introducing a boot init
function for MCE functionality.
(It's also broken if we boot on non-0 CPUs. Doesnt happen currently but
still.)
Also, the function names there are pretty messy - please do another,
rename patch that does something like:
mcheck_init() => mcheck_cpu_init()
mce_init() => __mcheck_cpu_init_generict()
mce_cpu_features() => __mcheck_cpu_init_vendor()
mce_init_timer() => __mcheck_cpu_init_timer()
etc.
I've applied your patch to x86/mce - please send delta patches to clean
up the other aspects of it.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] x86, mce: fixup mce naming nomenclature
2009-10-12 10:33 ` Ingo Molnar
@ 2009-10-13 15:43 ` Borislav Petkov
2009-10-13 15:44 ` [PATCH 2/2] x86, mce: Add a global MCE init helper Borislav Petkov
1 sibling, 0 replies; 4+ messages in thread
From: Borislav Petkov @ 2009-10-13 15:43 UTC (permalink / raw)
To: Ingo Molnar; +Cc: linux-kernel, H. Peter Anvin, Thomas Gleixner, Andi Kleen
>From 9459278f4cb5b108a4b648d4f079057be203ffee Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 13 Oct 2009 15:40:17 +0200
Subject: [PATCH 1/2] x86, mce: fixup mce naming nomenclature
Prefix global/setup routines called from outside with "mcheck_" thus
differentiating from the internal/facilities-related "mce_" prefixed
counterparts. Also, prefix the per-cpu calls with mcheck_cpu along with
changing their names to reflect the setup hierarchy of calls better.
There should be no functionality change resulting from this patch.
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
arch/x86/include/asm/mce.h | 4 +-
arch/x86/kernel/cpu/common.c | 2 +-
arch/x86/kernel/cpu/mcheck/mce.c | 52 +++++++++++++++++++-------------------
3 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 227a72d..161485d 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -120,9 +120,9 @@ extern int mce_disabled;
extern int mce_p5_enabled;
#ifdef CONFIG_X86_MCE
-void mcheck_init(struct cpuinfo_x86 *c);
+void mcheck_cpu_init(struct cpuinfo_x86 *c);
#else
-static inline void mcheck_init(struct cpuinfo_x86 *c) {}
+static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cc25c2b..4df69a3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -839,7 +839,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
- mcheck_init(c);
+ mcheck_cpu_init(c);
#endif
select_idle_routine(c);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 15ba9c9..b4a5eca 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1130,7 +1130,7 @@ static int check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
-static void mcheck_timer(unsigned long data)
+static void mce_start_timer(unsigned long data)
{
struct timer_list *t = &per_cpu(mce_timer, data);
int *n;
@@ -1214,7 +1214,7 @@ static int mce_banks_init(void)
/*
* Initialize Machine Checks for a CPU.
*/
-static int __cpuinit mce_cap_init(void)
+static int __cpuinit __mcheck_cpu_cap_init(void)
{
unsigned b;
u64 cap;
@@ -1251,7 +1251,7 @@ static int __cpuinit mce_cap_init(void)
return 0;
}
-static void mce_init(void)
+static void __mcheck_cpu_init_generic(void)
{
mce_banks_t all_banks;
u64 cap;
@@ -1280,7 +1280,7 @@ static void mce_init(void)
}
/* Add per CPU specific workarounds here */
-static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
+static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1348,7 +1348,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
return 0;
}
-static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
+static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
return;
@@ -1362,7 +1362,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
}
}
-static void mce_cpu_features(struct cpuinfo_x86 *c)
+static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
@@ -1376,7 +1376,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
}
}
-static void mce_init_timer(void)
+static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
int *n = &__get_cpu_var(mce_next_interval);
@@ -1387,7 +1387,7 @@ static void mce_init_timer(void)
*n = check_interval * HZ;
if (!*n)
return;
- setup_timer(t, mcheck_timer, smp_processor_id());
+ setup_timer(t, mce_start_timer, smp_processor_id());
t->expires = round_jiffies(jiffies + *n);
add_timer_on(t, smp_processor_id());
}
@@ -1407,26 +1407,26 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
*/
-void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
+void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
{
if (mce_disabled)
return;
- mce_ancient_init(c);
+ __mcheck_cpu_ancient_init(c);
if (!mce_available(c))
return;
- if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
+ if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
mce_disabled = 1;
return;
}
machine_check_vector = do_machine_check;
- mce_init();
- mce_cpu_features(c);
- mce_init_timer();
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_vendor(c);
+ __mcheck_cpu_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
if (raw_smp_processor_id() == 0)
@@ -1658,7 +1658,7 @@ __setup("mce", mcheck_enable);
* Disable machine checks on suspend and shutdown. We can't really handle
* them later.
*/
-static int mce_disable(void)
+static int mce_disable_error_reporting(void)
{
int i;
@@ -1673,12 +1673,12 @@ static int mce_disable(void)
static int mce_suspend(struct sys_device *dev, pm_message_t state)
{
- return mce_disable();
+ return mce_disable_error_reporting();
}
static int mce_shutdown(struct sys_device *dev)
{
- return mce_disable();
+ return mce_disable_error_reporting();
}
/*
@@ -1688,8 +1688,8 @@ static int mce_shutdown(struct sys_device *dev)
*/
static int mce_resume(struct sys_device *dev)
{
- mce_init();
- mce_cpu_features(¤t_cpu_data);
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_vendor(¤t_cpu_data);
return 0;
}
@@ -1699,8 +1699,8 @@ static void mce_cpu_restart(void *data)
del_timer_sync(&__get_cpu_var(mce_timer));
if (!mce_available(¤t_cpu_data))
return;
- mce_init();
- mce_init_timer();
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_timer();
}
/* Reinit MCEs after user configuration changes */
@@ -1726,7 +1726,7 @@ static void mce_enable_ce(void *all)
cmci_reenable();
cmci_recheck();
if (all)
- mce_init_timer();
+ __mcheck_cpu_init_timer();
}
static struct sysdev_class mce_sysclass = {
@@ -2035,7 +2035,7 @@ static __init void mce_init_banks(void)
}
}
-static __init int mce_init_device(void)
+static __init int mcheck_init_device(void)
{
int err;
int i = 0;
@@ -2063,7 +2063,7 @@ static __init int mce_init_device(void)
return err;
}
-device_initcall(mce_init_device);
+device_initcall(mcheck_init_device);
/*
* Old style boot options parsing. Only for compatibility.
@@ -2111,7 +2111,7 @@ static int fake_panic_set(void *data, u64 val)
DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
fake_panic_set, "%llu\n");
-static int __init mce_debugfs_init(void)
+static int __init mcheck_debugfs_init(void)
{
struct dentry *dmce, *ffake_panic;
@@ -2125,5 +2125,5 @@ static int __init mce_debugfs_init(void)
return 0;
}
-late_initcall(mce_debugfs_init);
+late_initcall(mcheck_debugfs_init);
#endif
--
1.6.4.3
--
Regards/Gruss,
Boris.
Operating | Advanced Micro Devices GmbH
System | Karl-Hammerschmidt-Str. 34, 85609 Dornach b. München, Germany
Research | Geschäftsführer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
(OSRC) | Registergericht München, HRB Nr. 43632
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] x86, mce: Add a global MCE init helper
2009-10-12 10:33 ` Ingo Molnar
2009-10-13 15:43 ` [PATCH 1/2] x86, mce: fixup mce naming nomenclature Borislav Petkov
@ 2009-10-13 15:44 ` Borislav Petkov
1 sibling, 0 replies; 4+ messages in thread
From: Borislav Petkov @ 2009-10-13 15:44 UTC (permalink / raw)
To: Ingo Molnar; +Cc: linux-kernel, H. Peter Anvin, Thomas Gleixner, Andi Kleen
>From a99c2033750588600bc8ff44dbe6d22c1a064238 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 13 Oct 2009 16:36:34 +0200
Subject: [PATCH 2/2] x86, mce: Add a global MCE init helper
Add an early pre-smp initcall for global MCE functionality setup.
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
arch/x86/kernel/cpu/mcheck/mce.c | 10 ++++++++--
1 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b4a5eca..3f7ac16 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1429,8 +1429,6 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
- if (raw_smp_processor_id() == 0)
- atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
}
/*
@@ -1650,6 +1648,14 @@ static int __init mcheck_enable(char *str)
}
__setup("mce", mcheck_enable);
+static int __init mcheck_init(void)
+{
+ atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
+
+ return 0;
+}
+early_initcall(mcheck_init);
+
/*
* Sysfs support
*/
--
1.6.4.3
--
Regards/Gruss,
Boris.
Operating | Advanced Micro Devices GmbH
System | Karl-Hammerschmidt-Str. 34, 85609 Dornach b. München, Germany
Research | Geschäftsführer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
(OSRC) | Registergericht München, HRB Nr. 43632
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2009-10-13 15:44 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-10-07 11:20 [RESEND PATCH] mce, edac: use an atomic notifier for MCEs decoding Borislav Petkov
2009-10-12 10:33 ` Ingo Molnar
2009-10-13 15:43 ` [PATCH 1/2] x86, mce: fixup mce naming nomenclature Borislav Petkov
2009-10-13 15:44 ` [PATCH 2/2] x86, mce: Add a global MCE init helper Borislav Petkov
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).