* [RESEND PATCH] mce, edac: use an atomic notifier for MCEs decoding
@ 2009-10-07 11:20 Borislav Petkov
2009-10-12 10:33 ` Ingo Molnar
0 siblings, 1 reply; 4+ messages in thread
From: Borislav Petkov @ 2009-10-07 11:20 UTC (permalink / raw)
To: Ingo Molnar; +Cc: x86, linux-kernel
Add an atomic notifier which ensures proper locking when conveying MCE
info to EDAC for decoding. The actual notifier call overrides a default,
negative priority notifier.
Note: make sure we register the default decoder only once since
mcheck_init() runs on each CPU.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <20091003065752.GA8935@liondog.tnic>
---
arch/x86/include/asm/mce.h | 3 ++-
arch/x86/kernel/cpu/mcheck/mce.c | 30 +++++++++++++++++++++---------
drivers/edac/edac_mce_amd.c | 23 ++++++++++++++---------
3 files changed, 37 insertions(+), 19 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index f1363b7..227a72d 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -108,6 +108,8 @@ struct mce_log {
#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
+extern struct atomic_notifier_head x86_mce_decoder_chain;
+
#ifdef __KERNEL__
#include <linux/percpu.h>
@@ -213,6 +215,5 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
void intel_init_thermal(struct cpuinfo_x86 *c);
void mce_log_therm_throt_event(__u64 status);
-
#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b1598a9..09c2e2e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -85,18 +85,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
-static void default_decode_mce(struct mce *m)
+/*
+ * CPU/chipset specific EDAC code can register a notifier call here to print
+ * MCE errors in a human-readable form.
+ */
+ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
+EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
+
+static int default_decode_mce(struct notifier_block *nb, unsigned long val,
+ void *data)
{
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
+
+ return NOTIFY_STOP;
}
-/*
- * CPU/chipset specific EDAC code can register a callback here to print
- * MCE errors in a human-readable form:
- */
-void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
-EXPORT_SYMBOL(x86_mce_decode_callback);
+static struct notifier_block mce_dec_nb = {
+ .notifier_call = default_decode_mce,
+ .priority = -1,
+};
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@@ -204,9 +212,9 @@ static void print_mce(struct mce *m)
/*
* Print out human-readable details about the MCE error,
- * (if the CPU has an implementation for that):
+ * (if the CPU has an implementation for that)
*/
- x86_mce_decode_callback(m);
+ atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
}
static void print_mce_head(void)
@@ -1420,6 +1428,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
mce_cpu_features(c);
mce_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
+
+ if (raw_smp_processor_id() == 0)
+ atomic_notifier_chain_register(&x86_mce_decoder_chain,
+ &mce_dec_nb);
}
/*
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c
index 713ed7d..6ee7529 100644
--- a/drivers/edac/edac_mce_amd.c
+++ b/drivers/edac/edac_mce_amd.c
@@ -3,7 +3,6 @@
static bool report_gart_errors;
static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
-static void (*orig_mce_callback)(struct mce *m);
void amd_report_gart_errors(bool v)
{
@@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec)
pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
}
-static void amd_decode_mce(struct mce *m)
+static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
+ void *data)
{
+ struct mce *m = (struct mce *)data;
struct err_regs regs;
int node, ecc;
@@ -420,20 +421,23 @@ static void amd_decode_mce(struct mce *m)
}
amd_decode_err_code(m->status & 0xffff);
+
+ return NOTIFY_STOP;
}
+static struct notifier_block amd_mce_dec_nb = {
+ .notifier_call = amd_decode_mce,
+};
+
static int __init mce_amd_init(void)
{
/*
* We can decode MCEs for Opteron and later CPUs:
*/
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
- (boot_cpu_data.x86 >= 0xf)) {
- /* safe the default decode mce callback */
- orig_mce_callback = x86_mce_decode_callback;
-
- x86_mce_decode_callback = amd_decode_mce;
- }
+ (boot_cpu_data.x86 >= 0xf))
+ atomic_notifier_chain_register(&x86_mce_decoder_chain,
+ &amd_mce_dec_nb);
return 0;
}
@@ -442,7 +446,8 @@ early_initcall(mce_amd_init);
#ifdef MODULE
static void __exit mce_amd_exit(void)
{
- x86_mce_decode_callback = orig_mce_callback;
+ atomic_notifier_chain_unregister(&x86_mce_decoder_chain,
+ &amd_mce_dec_nb);
}
MODULE_DESCRIPTION("AMD MCE decoder");
--
1.6.4.3
--
Regards/Gruss,
Boris.
Operating | Advanced Micro Devices GmbH
System | Karl-Hammerschmidt-Str. 34, 85609 Dornach b. München, Germany
Research | Geschäftsführer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
(OSRC) | Registergericht München, HRB Nr. 43632
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [RESEND PATCH] mce, edac: use an atomic notifier for MCEs decoding
2009-10-07 11:20 [RESEND PATCH] mce, edac: use an atomic notifier for MCEs decoding Borislav Petkov
@ 2009-10-12 10:33 ` Ingo Molnar
2009-10-13 15:43 ` [PATCH 1/2] x86, mce: fixup mce naming nomenclature Borislav Petkov
2009-10-13 15:44 ` [PATCH 2/2] x86, mce: Add a global MCE init helper Borislav Petkov
0 siblings, 2 replies; 4+ messages in thread
From: Ingo Molnar @ 2009-10-12 10:33 UTC (permalink / raw)
To: Borislav Petkov; +Cc: linux-kernel, H. Peter Anvin, Thomas Gleixner
* Borislav Petkov <borislav.petkov@amd.com> wrote:
> @@ -1420,6 +1428,10 @@ void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
> mce_cpu_features(c);
> mce_init_timer();
> INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
> +
> + if (raw_smp_processor_id() == 0)
> + atomic_notifier_chain_register(&x86_mce_decoder_chain,
> + &mce_dec_nb);
> }
this bit should be done in a cleaner way, by introducing a boot init
function for MCE functionality.
(It's also broken if we boot on non-0 CPUs. Doesnt happen currently but
still.)
Also, the function names there are pretty messy - please do another,
rename patch that does something like:
mcheck_init() => mcheck_cpu_init()
mce_init() => __mcheck_cpu_init_generict()
mce_cpu_features() => __mcheck_cpu_init_vendor()
mce_init_timer() => __mcheck_cpu_init_timer()
etc.
I've applied your patch to x86/mce - please send delta patches to clean
up the other aspects of it.
Thanks,
Ingo
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/2] x86, mce: fixup mce naming nomenclature
2009-10-12 10:33 ` Ingo Molnar
@ 2009-10-13 15:43 ` Borislav Petkov
2009-10-13 15:44 ` [PATCH 2/2] x86, mce: Add a global MCE init helper Borislav Petkov
1 sibling, 0 replies; 4+ messages in thread
From: Borislav Petkov @ 2009-10-13 15:43 UTC (permalink / raw)
To: Ingo Molnar; +Cc: linux-kernel, H. Peter Anvin, Thomas Gleixner, Andi Kleen
>From 9459278f4cb5b108a4b648d4f079057be203ffee Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 13 Oct 2009 15:40:17 +0200
Subject: [PATCH 1/2] x86, mce: fixup mce naming nomenclature
Prefix global/setup routines called from outside with "mcheck_" thus
differentiating from the internal/facilities-related "mce_" prefixed
counterparts. Also, prefix the per-cpu calls with mcheck_cpu along with
changing their names to reflect the setup hierarchy of calls better.
There should be no functionality change resulting from this patch.
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
arch/x86/include/asm/mce.h | 4 +-
arch/x86/kernel/cpu/common.c | 2 +-
arch/x86/kernel/cpu/mcheck/mce.c | 52 +++++++++++++++++++-------------------
3 files changed, 29 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 227a72d..161485d 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -120,9 +120,9 @@ extern int mce_disabled;
extern int mce_p5_enabled;
#ifdef CONFIG_X86_MCE
-void mcheck_init(struct cpuinfo_x86 *c);
+void mcheck_cpu_init(struct cpuinfo_x86 *c);
#else
-static inline void mcheck_init(struct cpuinfo_x86 *c) {}
+static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cc25c2b..4df69a3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -839,7 +839,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
- mcheck_init(c);
+ mcheck_cpu_init(c);
#endif
select_idle_routine(c);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 15ba9c9..b4a5eca 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1130,7 +1130,7 @@ static int check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
-static void mcheck_timer(unsigned long data)
+static void mce_start_timer(unsigned long data)
{
struct timer_list *t = &per_cpu(mce_timer, data);
int *n;
@@ -1214,7 +1214,7 @@ static int mce_banks_init(void)
/*
* Initialize Machine Checks for a CPU.
*/
-static int __cpuinit mce_cap_init(void)
+static int __cpuinit __mcheck_cpu_cap_init(void)
{
unsigned b;
u64 cap;
@@ -1251,7 +1251,7 @@ static int __cpuinit mce_cap_init(void)
return 0;
}
-static void mce_init(void)
+static void __mcheck_cpu_init_generic(void)
{
mce_banks_t all_banks;
u64 cap;
@@ -1280,7 +1280,7 @@ static void mce_init(void)
}
/* Add per CPU specific workarounds here */
-static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
+static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1348,7 +1348,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
return 0;
}
-static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
+static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
return;
@@ -1362,7 +1362,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
}
}
-static void mce_cpu_features(struct cpuinfo_x86 *c)
+static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
@@ -1376,7 +1376,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
}
}
-static void mce_init_timer(void)
+static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
int *n = &__get_cpu_var(mce_next_interval);
@@ -1387,7 +1387,7 @@ static void mce_init_timer(void)
*n = check_interval * HZ;
if (!*n)
return;
- setup_timer(t, mcheck_timer, smp_processor_id());
+ setup_timer(t, mce_start_timer, smp_processor_id());
t->expires = round_jiffies(jiffies + *n);
add_timer_on(t, smp_processor_id());
}
@@ -1407,26 +1407,26 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
*/
-void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
+void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
{
if (mce_disabled)
return;
- mce_ancient_init(c);
+ __mcheck_cpu_ancient_init(c);
if (!mce_available(c))
return;
- if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
+ if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
mce_disabled = 1;
return;
}
machine_check_vector = do_machine_check;
- mce_init();
- mce_cpu_features(c);
- mce_init_timer();
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_vendor(c);
+ __mcheck_cpu_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
if (raw_smp_processor_id() == 0)
@@ -1658,7 +1658,7 @@ __setup("mce", mcheck_enable);
* Disable machine checks on suspend and shutdown. We can't really handle
* them later.
*/
-static int mce_disable(void)
+static int mce_disable_error_reporting(void)
{
int i;
@@ -1673,12 +1673,12 @@ static int mce_disable(void)
static int mce_suspend(struct sys_device *dev, pm_message_t state)
{
- return mce_disable();
+ return mce_disable_error_reporting();
}
static int mce_shutdown(struct sys_device *dev)
{
- return mce_disable();
+ return mce_disable_error_reporting();
}
/*
@@ -1688,8 +1688,8 @@ static int mce_shutdown(struct sys_device *dev)
*/
static int mce_resume(struct sys_device *dev)
{
- mce_init();
- mce_cpu_features(¤t_cpu_data);
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_vendor(¤t_cpu_data);
return 0;
}
@@ -1699,8 +1699,8 @@ static void mce_cpu_restart(void *data)
del_timer_sync(&__get_cpu_var(mce_timer));
if (!mce_available(¤t_cpu_data))
return;
- mce_init();
- mce_init_timer();
+ __mcheck_cpu_init_generic();
+ __mcheck_cpu_init_timer();
}
/* Reinit MCEs after user configuration changes */
@@ -1726,7 +1726,7 @@ static void mce_enable_ce(void *all)
cmci_reenable();
cmci_recheck();
if (all)
- mce_init_timer();
+ __mcheck_cpu_init_timer();
}
static struct sysdev_class mce_sysclass = {
@@ -2035,7 +2035,7 @@ static __init void mce_init_banks(void)
}
}
-static __init int mce_init_device(void)
+static __init int mcheck_init_device(void)
{
int err;
int i = 0;
@@ -2063,7 +2063,7 @@ static __init int mce_init_device(void)
return err;
}
-device_initcall(mce_init_device);
+device_initcall(mcheck_init_device);
/*
* Old style boot options parsing. Only for compatibility.
@@ -2111,7 +2111,7 @@ static int fake_panic_set(void *data, u64 val)
DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
fake_panic_set, "%llu\n");
-static int __init mce_debugfs_init(void)
+static int __init mcheck_debugfs_init(void)
{
struct dentry *dmce, *ffake_panic;
@@ -2125,5 +2125,5 @@ static int __init mce_debugfs_init(void)
return 0;
}
-late_initcall(mce_debugfs_init);
+late_initcall(mcheck_debugfs_init);
#endif
--
1.6.4.3
--
Regards/Gruss,
Boris.
Operating | Advanced Micro Devices GmbH
System | Karl-Hammerschmidt-Str. 34, 85609 Dornach b. München, Germany
Research | Geschäftsführer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
(OSRC) | Registergericht München, HRB Nr. 43632
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/2] x86, mce: Add a global MCE init helper
2009-10-12 10:33 ` Ingo Molnar
2009-10-13 15:43 ` [PATCH 1/2] x86, mce: fixup mce naming nomenclature Borislav Petkov
@ 2009-10-13 15:44 ` Borislav Petkov
1 sibling, 0 replies; 4+ messages in thread
From: Borislav Petkov @ 2009-10-13 15:44 UTC (permalink / raw)
To: Ingo Molnar; +Cc: linux-kernel, H. Peter Anvin, Thomas Gleixner, Andi Kleen
>From a99c2033750588600bc8ff44dbe6d22c1a064238 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 13 Oct 2009 16:36:34 +0200
Subject: [PATCH 2/2] x86, mce: Add a global MCE init helper
Add an early pre-smp initcall for global MCE functionality setup.
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
arch/x86/kernel/cpu/mcheck/mce.c | 10 ++++++++--
1 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b4a5eca..3f7ac16 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1429,8 +1429,6 @@ void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
__mcheck_cpu_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
- if (raw_smp_processor_id() == 0)
- atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
}
/*
@@ -1650,6 +1648,14 @@ static int __init mcheck_enable(char *str)
}
__setup("mce", mcheck_enable);
+static int __init mcheck_init(void)
+{
+ atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
+
+ return 0;
+}
+early_initcall(mcheck_init);
+
/*
* Sysfs support
*/
--
1.6.4.3
--
Regards/Gruss,
Boris.
Operating | Advanced Micro Devices GmbH
System | Karl-Hammerschmidt-Str. 34, 85609 Dornach b. München, Germany
Research | Geschäftsführer: Andrew Bowd, Thomas M. McCoy, Giuliano Meroni
Center | Sitz: Dornach, Gemeinde Aschheim, Landkreis München
(OSRC) | Registergericht München, HRB Nr. 43632
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2009-10-13 15:44 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-10-07 11:20 [RESEND PATCH] mce, edac: use an atomic notifier for MCEs decoding Borislav Petkov
2009-10-12 10:33 ` Ingo Molnar
2009-10-13 15:43 ` [PATCH 1/2] x86, mce: fixup mce naming nomenclature Borislav Petkov
2009-10-13 15:44 ` [PATCH 2/2] x86, mce: Add a global MCE init helper Borislav Petkov
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.