* [PATCH] [0/3] x86: mce: A few more minor cleanups for the x86 mce code
@ 2009-07-11 7:44 Andi Kleen
2009-07-11 7:44 ` [PATCH] [1/3] x86: mce: Move most mce subsystem internal declarations into mce-internal.h Andi Kleen
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Andi Kleen @ 2009-07-11 7:44 UTC (permalink / raw)
To: x86, linux-kernel
Some more cleanups, following up the previous mce cleanup series.
This doesn't change any code behaviour, just tidies up the code
a bit.
This applies on top of the previous cleanup series.
-Andi
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] [1/3] x86: mce: Move most mce subsystem internal declarations into mce-internal.h
2009-07-11 7:44 [PATCH] [0/3] x86: mce: A few more minor cleanups for the x86 mce code Andi Kleen
@ 2009-07-11 7:44 ` Andi Kleen
2009-07-11 7:44 ` [PATCH] [2/3] x86: mce: Improve comments in CMCI code Andi Kleen
2009-07-11 7:44 ` [PATCH] [3/3] x86: mce: Improve comments in mce.c Andi Kleen
2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2009-07-11 7:44 UTC (permalink / raw)
To: x86, linux-kernel
Move MCE subsystem internal prototypes and externs into mce-internal.h
This way they don't pollute the global include name space (but
are still global on the linker level).
I didn't move all, especially not prototypes that are logically
not internal (like thermal setup)
No code behaviour changes.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/include/asm/mce.h | 48 +++++-------------------------
arch/x86/kernel/cpu/mcheck/mce-inject.c | 2 +
arch/x86/kernel/cpu/mcheck/mce-internal.h | 42 ++++++++++++++++++++++++++
arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +
arch/x86/kernel/cpu/mcheck/mce_intel.c | 1
5 files changed, 56 insertions(+), 39 deletions(-)
Index: linux/arch/x86/include/asm/mce.h
===================================================================
--- linux.orig/arch/x86/include/asm/mce.h
+++ linux/arch/x86/include/asm/mce.h
@@ -106,9 +106,6 @@ struct mce_log {
#include <linux/init.h>
#include <asm/atomic.h>
-extern int mce_disabled;
-extern int mce_p5_enabled;
-
#ifdef CONFIG_X86_MCE
void mcheck_init(struct cpuinfo_x86 *c);
#else
@@ -127,7 +124,6 @@ static inline void enable_p5_mce(void) {
void mce_setup(struct mce *m);
void mce_log(struct mce *m);
-DECLARE_PER_CPU(struct sys_device, mce_dev);
/*
* Maximum banks number.
@@ -136,28 +132,6 @@ DECLARE_PER_CPU(struct sys_device, mce_d
*/
#define MAX_NR_BANKS 32
-#ifdef CONFIG_X86_MCE_INTEL
-extern int mce_cmci_disabled;
-extern int mce_ignore_ce;
-void mce_intel_feature_init(struct cpuinfo_x86 *c);
-void cmci_clear(void);
-void cmci_reenable(void);
-void cmci_rediscover(int dying);
-void cmci_recheck(void);
-#else
-static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
-static inline void cmci_clear(void) {}
-static inline void cmci_reenable(void) {}
-static inline void cmci_rediscover(int dying) {}
-static inline void cmci_recheck(void) {}
-#endif
-
-#ifdef CONFIG_X86_MCE_AMD
-void mce_amd_feature_init(struct cpuinfo_x86 *c);
-#else
-static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
-#endif
-
int mce_available(struct cpuinfo_x86 *c);
DECLARE_PER_CPU(unsigned, mce_exception_count);
@@ -165,22 +139,9 @@ DECLARE_PER_CPU(unsigned, mce_poll_count
extern atomic_t mce_entry;
-typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
-DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
-
-enum mcp_flags {
- MCP_TIMESTAMP = (1 << 0), /* log time stamp */
- MCP_UC = (1 << 1), /* log uncorrected errors */
- MCP_DONTLOG = (1 << 2), /* only clear, don't log */
-};
-void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
-
int mce_notify_irq(void);
void mce_notify_process(void);
-DECLARE_PER_CPU(struct mce, injectm);
-extern struct file_operations mce_chrdev_ops;
-
/*
* Exception handler
*/
@@ -204,5 +165,14 @@ void intel_init_thermal(struct cpuinfo_x
void mce_log_therm_throt_event(__u64 status);
+/*
+ * Intel CMCI
+ */
+#ifdef CONFIG_X86_MCE_INTEL
+void cmci_recheck(void);
+#else
+static inline void cmci_recheck(void) {}
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */
Index: linux/arch/x86/kernel/cpu/mcheck/mce-inject.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -20,6 +20,8 @@
#include <linux/smp.h>
#include <asm/mce.h>
+#include "mce-internal.h"
+
/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
{
Index: linux/arch/x86/kernel/cpu/mcheck/mce-internal.h
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ linux/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,4 +1,5 @@
#include <linux/sysdev.h>
+#include <linux/percpu.h>
#include <asm/mce.h>
enum severity_level {
@@ -24,6 +25,47 @@ struct mce_bank {
int mce_severity(struct mce *a, int tolerant, char **msg);
extern int mce_ser;
+extern int mce_ignore_ce;
extern struct mce_bank *mce_banks;
+extern int mce_disabled;
+extern int mce_p5_enabled;
+
+DECLARE_PER_CPU(struct sys_device, mce_dev);
+
+/*
+ * MCE corrected error support
+ */
+
+typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS);
+DECLARE_PER_CPU(mce_banks_t, mce_poll_banks);
+
+enum mcp_flags {
+ MCP_TIMESTAMP = (1 << 0), /* log time stamp */
+ MCP_UC = (1 << 1), /* log uncorrected errors */
+ MCP_DONTLOG = (1 << 2), /* only clear, don't log */
+};
+void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+
+#ifdef CONFIG_X86_MCE_INTEL
+extern int mce_cmci_disabled;
+void mce_intel_feature_init(struct cpuinfo_x86 *c);
+void cmci_clear(void);
+void cmci_reenable(void);
+void cmci_rediscover(int dying);
+#else
+static inline void mce_intel_feature_init(struct cpuinfo_x86 *c) { }
+static inline void cmci_clear(void) {}
+static inline void cmci_reenable(void) {}
+static inline void cmci_rediscover(int dying) {}
+#endif
+
+#ifdef CONFIG_X86_MCE_AMD
+void mce_amd_feature_init(struct cpuinfo_x86 *c);
+#else
+static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
+#endif
+
+DECLARE_PER_CPU(struct mce, injectm);
+extern struct file_operations mce_chrdev_ops;
Index: linux/arch/x86/kernel/cpu/mcheck/mce_amd.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -30,6 +30,8 @@
#include <asm/mce.h>
#include <asm/msr.h>
+#include "mce-internal.h"
+
#define PFX "mce_threshold: "
#define VERSION "version 1.1.1"
#define NR_BANKS 6
Index: linux/arch/x86/kernel/cpu/mcheck/mce_intel.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -12,6 +12,7 @@
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>
+#include "mce-internal.h"
/*
* Support for Intel Correct Machine Check Interrupts. This allows
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] [2/3] x86: mce: Improve comments in CMCI code
2009-07-11 7:44 [PATCH] [0/3] x86: mce: A few more minor cleanups for the x86 mce code Andi Kleen
2009-07-11 7:44 ` [PATCH] [1/3] x86: mce: Move most mce subsystem internal declarations into mce-internal.h Andi Kleen
@ 2009-07-11 7:44 ` Andi Kleen
2009-07-11 7:44 ` [PATCH] [3/3] x86: mce: Improve comments in mce.c Andi Kleen
2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2009-07-11 7:44 UTC (permalink / raw)
To: x86, linux-kernel
Improve the comments in the CMCI code in mce_intel.c. This documents
some of the design decisions and adds references to the appropiate
manuals.
No code changes.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/mcheck/mce_intel.c | 14 +++++++++++++-
1 file changed, 13 insertions(+), 1 deletion(-)
Index: linux/arch/x86/kernel/cpu/mcheck/mce_intel.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -19,8 +19,16 @@
* the CPU to raise an interrupt when a corrected machine check happened.
* Normally we pick those up using a regular polling timer.
* Also supports reliable discovery of shared banks.
+ *
+ * For reference see the Intel 64 Software Developer's Manual, Volume 3a,
+ * 15.5.2. This code is a relatively faithful implementation of the
+ * recommendations there.
*/
+/*
+ * Ownership of MCE banks per CPU. To avoid duplicated events
+ * for shared banks we assign ownership to specific CPUs.
+ */
static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
/*
@@ -29,6 +37,10 @@ static DEFINE_PER_CPU(mce_banks_t, mce_b
*/
static DEFINE_SPINLOCK(cmci_discover_lock);
+/*
+ * CMCI threshold in hardware has some drawbacks. We chose to log every event
+ * and hardcode 1
+ */
#define CMCI_THRESHOLD 1
static int cmci_supported(int *banks)
@@ -163,7 +175,7 @@ void cmci_clear(void)
/*
* After a CPU went down cycle through all the others and rediscover
- * Must run in process context.
+ * bank ownership. Must run in process context.
*/
void cmci_rediscover(int dying)
{
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH] [3/3] x86: mce: Improve comments in mce.c
2009-07-11 7:44 [PATCH] [0/3] x86: mce: A few more minor cleanups for the x86 mce code Andi Kleen
2009-07-11 7:44 ` [PATCH] [1/3] x86: mce: Move most mce subsystem internal declarations into mce-internal.h Andi Kleen
2009-07-11 7:44 ` [PATCH] [2/3] x86: mce: Improve comments in CMCI code Andi Kleen
@ 2009-07-11 7:44 ` Andi Kleen
2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2009-07-11 7:44 UTC (permalink / raw)
To: x86, linux-kernel
- Add references to documentation
- Add a top level comment giving a quick overview.
- Improve a few other comments.
No code changes
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/mcheck/mce.c | 49 ++++++++++++++++++++++++++++++++++++---
1 file changed, 46 insertions(+), 3 deletions(-)
Index: linux/arch/x86/kernel/cpu/mcheck/mce.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/mcheck/mce.c
+++ linux/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1,11 +1,31 @@
/*
- * Machine check handler.
+ * Machine check handler. This handles hardware errors detected by
+ * the CPU.
*
* K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
* Rest from unknown author(s).
* 2004 Andi Kleen. Rewrote most of it.
* Copyright 2008 Intel Corporation
* Author: Andi Kleen
+ *
+ * This code handles both corrected (by hardware) errors and
+ * uncorrected errors. The corrected errors are only logged and
+ * handled by machine_check_poll() et.al. The entry point for
+ * uncorrected errors is do_machine_check() which handles the machine
+ * check exception (int 18) raised by the CPU. Uncorrected errors can
+ * either panic or in some special cases be recovered. The logging of
+ * machine check events is done through a special /dev/mcelog
+ * device. Then there is a lot of support code for setting up machine
+ * checks and configuring them.
+ *
+ * References:
+ * Intel 64 Software developer's manual (SDM)
+ * System Programming Guide Volume 3a
+ * Chapter 15 "Machine-check architecture"
+ * You should read that before changing anything.
+ *
+ * Old, outdated paper, but gives a reasonable overview
+ * http://halobates.de/mce.pdf
*/
#include <linux/thread_info.h>
#include <linux/capability.h>
@@ -164,6 +184,11 @@ void mce_log(struct mce *mce)
set_bit(0, &mce_need_notify);
}
+/*
+ * Panic handling. Print machine checks to the console in case of a
+ * unrecoverable error.
+ */
+
static void print_mce(struct mce *m)
{
printk(KERN_EMERG
@@ -260,7 +285,9 @@ static void mce_panic(char *msg, struct
panic(msg);
}
-/* Support code for software error injection */
+/*
+ * Support code for software error injection
+ */
static int msr_to_offset(u32 msr)
{
@@ -409,6 +436,11 @@ asmlinkage void smp_mce_self_interrupt(s
}
#endif
+/*
+ * Schedule further processing of a machine check event after
+ * the exception handler ran. Has to be careful about context because
+ * MCEs run lockless independent from any normal kernel locks.
+ */
static void mce_report_event(struct pt_regs *regs)
{
if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) {
@@ -454,6 +486,9 @@ DEFINE_PER_CPU(unsigned, mce_poll_count)
* Poll for corrected events or events that happened before reset.
* Those are just logged through /dev/mcelog.
*
+ * Either called regularly from a timer, or by special corrected
+ * error interrupts.
+ *
* This is executed in standard interrupt context.
*
* Note: spec recommends to panic for fatal unsignalled
@@ -547,6 +582,10 @@ static int mce_no_way_out(struct mce *m,
}
/*
+ * Support for synchronizing machine checks over all CPUs.
+ */
+
+/*
* Variable to establish order between CPUs while scanning.
* Each CPU spins initially until executing is equal its number.
*/
@@ -1221,7 +1260,11 @@ static void mce_init(void)
}
}
-/* Add per CPU specific workarounds here */
+/*
+ * This function contains workarounds for various machine check
+ * related CPU quirks. Primarly it disables broken machine check
+ * events.
+ */
static void mce_cpu_quirks(struct cpuinfo_x86 *c)
{
/* This should be disabled by the BIOS, but isn't always */
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2009-07-11 7:45 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-11 7:44 [PATCH] [0/3] x86: mce: A few more minor cleanups for the x86 mce code Andi Kleen
2009-07-11 7:44 ` [PATCH] [1/3] x86: mce: Move most mce subsystem internal declarations into mce-internal.h Andi Kleen
2009-07-11 7:44 ` [PATCH] [2/3] x86: mce: Improve comments in CMCI code Andi Kleen
2009-07-11 7:44 ` [PATCH] [3/3] x86: mce: Improve comments in mce.c Andi Kleen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox