* [PATCH 2/6] HWPOISON: Add code to handle "action required" errors.
2012-01-03 19:49 [PATCH 0/6] x86, mce: machine check recovery for applications Tony Luck
@ 2011-12-13 17:27 ` Tony Luck
2011-12-13 17:48 ` [PATCH 3/6] x86/mce: Create helper function to save addr/misc when needed Tony Luck
` (4 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Tony Luck @ 2011-12-13 17:27 UTC (permalink / raw)
To: linux-kernel
Cc: Ingo Molnar, Borislav Petkov, Chen Gong, Huang, Ying,
Hidetoshi Seto
Add new flag bit "MF_ACTION_REQUIRED" to be used by machine check
code to force a signal with si_code = BUS_MCEERR_AR in the case
where the error occurs in processor execution context. Pass the
flags argument along call chain:
memory_failure()
hwpoison_user_mappings()
kill_procs()
kill_proc()
Drop the "_ao" suffix from kill_procs_ao() and kill_proc_ao() since
they can now handle "action required" as well as "action optional" errors.
Acked-by: Borislav Petkov <bp@amd64.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
include/linux/mm.h | 1 +
mm/memory-failure.c | 50 +++++++++++++++++++++++++++++---------------------
2 files changed, 30 insertions(+), 21 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bcc5234..bf169ca 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1606,6 +1606,7 @@ void vmemmap_populate_print_last(void);
enum mf_flags {
MF_COUNT_INCREASED = 1 << 0,
+ MF_ACTION_REQUIRED = 1 << 1,
};
extern int memory_failure(unsigned long pfn, int trapno, int flags);
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ab259bb..95fd307 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -187,33 +187,40 @@ int hwpoison_filter(struct page *p)
EXPORT_SYMBOL_GPL(hwpoison_filter);
/*
- * Send all the processes who have the page mapped an ``action optional''
- * signal.
+ * Send all the processes who have the page mapped a signal.
+ * ``action optional'' if they are not immediately affected by the error
+ * ``action required'' if error happened in current execution context
*/
-static int kill_proc_ao(struct task_struct *t, unsigned long addr, int trapno,
- unsigned long pfn, struct page *page)
+static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
+ unsigned long pfn, struct page *page, int flags)
{
struct siginfo si;
int ret;
printk(KERN_ERR
- "MCE %#lx: Killing %s:%d early due to hardware memory corruption\n",
+ "MCE %#lx: Killing %s:%d due to hardware memory corruption\n",
pfn, t->comm, t->pid);
si.si_signo = SIGBUS;
si.si_errno = 0;
- si.si_code = BUS_MCEERR_AO;
si.si_addr = (void *)addr;
#ifdef __ARCH_SI_TRAPNO
si.si_trapno = trapno;
#endif
si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT;
- /*
- * Don't use force here, it's convenient if the signal
- * can be temporarily blocked.
- * This could cause a loop when the user sets SIGBUS
- * to SIG_IGN, but hopefully no one will do that?
- */
- ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
+
+ if ((flags & MF_ACTION_REQUIRED) && t == current) {
+ si.si_code = BUS_MCEERR_AR;
+ ret = force_sig_info(SIGBUS, &si, t);
+ } else {
+ /*
+ * Don't use force here, it's convenient if the signal
+ * can be temporarily blocked.
+ * This could cause a loop when the user sets SIGBUS
+ * to SIG_IGN, but hopefully no one will do that?
+ */
+ si.si_code = BUS_MCEERR_AO;
+ ret = send_sig_info(SIGBUS, &si, t); /* synchronous? */
+ }
if (ret < 0)
printk(KERN_INFO "MCE: Error sending signal to %s:%d: %d\n",
t->comm, t->pid, ret);
@@ -338,8 +345,9 @@ static void add_to_kill(struct task_struct *tsk, struct page *p,
* Also when FAIL is set do a force kill because something went
* wrong earlier.
*/
-static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
- int fail, struct page *page, unsigned long pfn)
+static void kill_procs(struct list_head *to_kill, int doit, int trapno,
+ int fail, struct page *page, unsigned long pfn,
+ int flags)
{
struct to_kill *tk, *next;
@@ -363,8 +371,8 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
* check for that, but we need to tell the
* process anyways.
*/
- else if (kill_proc_ao(tk->tsk, tk->addr, trapno,
- pfn, page) < 0)
+ else if (kill_proc(tk->tsk, tk->addr, trapno,
+ pfn, page, flags) < 0)
printk(KERN_ERR
"MCE %#lx: Cannot send advisory machine check signal to %s:%d\n",
pfn, tk->tsk->comm, tk->tsk->pid);
@@ -844,7 +852,7 @@ static int page_action(struct page_state *ps, struct page *p,
* the pages and send SIGBUS to the processes if the data was dirty.
*/
static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
- int trapno)
+ int trapno, int flags)
{
enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
struct address_space *mapping;
@@ -962,8 +970,8 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
* use a more force-full uncatchable kill to prevent
* any accesses to the poisoned memory.
*/
- kill_procs_ao(&tokill, !!PageDirty(ppage), trapno,
- ret != SWAP_SUCCESS, p, pfn);
+ kill_procs(&tokill, !!PageDirty(ppage), trapno,
+ ret != SWAP_SUCCESS, p, pfn, flags);
return ret;
}
@@ -1148,7 +1156,7 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
* Now take care of user space mappings.
* Abort on fail: __delete_from_page_cache() assumes unmapped page.
*/
- if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) {
+ if (hwpoison_user_mappings(p, pfn, trapno, flags) != SWAP_SUCCESS) {
printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
res = -EBUSY;
goto out;
--
1.7.3.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 3/6] x86/mce: Create helper function to save addr/misc when needed
2012-01-03 19:49 [PATCH 0/6] x86, mce: machine check recovery for applications Tony Luck
2011-12-13 17:27 ` [PATCH 2/6] HWPOISON: Add code to handle "action required" errors Tony Luck
@ 2011-12-13 17:48 ` Tony Luck
2011-12-14 23:55 ` [PATCH 4/6] x86/mce: Add mechanism to safely save information in MCE handler Tony Luck
` (3 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Tony Luck @ 2011-12-13 17:48 UTC (permalink / raw)
To: linux-kernel
Cc: Ingo Molnar, Borislav Petkov, Chen Gong, Huang, Ying,
Hidetoshi Seto
The MCI_STATUS_MISCV and MCI_STATUS_ADDRV bits in the bank status
registers define whether the MISC and ADDR registers respectively
contain valid data - provide a helper function to check these bits
and read the registers when needed.
In addition, processors that support software error recovery (as
indicated by the MCG_SER_P bit in the MCG_CAP register) may include
some undefined bits in the ADDR register - mask these out.
Acked-by: Borislav Petkov <bp@amd64.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/mcheck/mce.c | 31 +++++++++++++++++++++++--------
1 files changed, 23 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 1a08ce5..2f1c200 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -492,6 +492,27 @@ static void mce_report_event(struct pt_regs *regs)
irq_work_queue(&__get_cpu_var(mce_irq_work));
}
+/*
+ * Read ADDR and MISC registers.
+ */
+static void mce_read_aux(struct mce *m, int i)
+{
+ if (m->status & MCI_STATUS_MISCV)
+ m->misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
+ if (m->status & MCI_STATUS_ADDRV) {
+ m->addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+
+ /*
+ * Mask the reported address by the reported granularity.
+ */
+ if (mce_ser && (m->status & MCI_STATUS_MISCV)) {
+ u8 shift = MCI_MISC_ADDR_LSB(m->misc);
+ m->addr >>= shift;
+ m->addr <<= shift;
+ }
+ }
+}
+
DEFINE_PER_CPU(unsigned, mce_poll_count);
/*
@@ -542,10 +563,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)))
continue;
- if (m.status & MCI_STATUS_MISCV)
- m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
- if (m.status & MCI_STATUS_ADDRV)
- m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+ mce_read_aux(&m, i);
if (!(flags & MCP_TIMESTAMP))
m.tsc = 0;
@@ -981,10 +999,7 @@ void do_machine_check(struct pt_regs *regs, long error_code)
if (severity == MCE_AR_SEVERITY)
kill_it = 1;
- if (m.status & MCI_STATUS_MISCV)
- m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
- if (m.status & MCI_STATUS_ADDRV)
- m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
+ mce_read_aux(&m, i);
/*
* Action optional error. Queue address for later processing.
--
1.7.3.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 0/6] x86, mce: machine check recovery for applications
@ 2011-12-13 19:05 Tony Luck
0 siblings, 0 replies; 8+ messages in thread
From: Tony Luck @ 2011-12-13 19:05 UTC (permalink / raw)
To: linux-kernel; +Cc: Ingo Molnar, Borislav Petkov, Huang, Ying, Hidetoshi Seto
Yet another version ...
Some bits should look familiar (hopefully pieces that were not too controversial
from earlier versions). Other bits are all new (e.g. part 4/6 which sets up some
functions that can safely save away the physical address of the faulting address
in the machine check handler for later retrieval in a safer execution context).
Tony Luck (6):
HWPOISON: clean up memory_failure() vs. __memory_failure()
HWPOISON: Add code to handle "action required" errors.
x86, mce: create helper function to save addr/misc when needed
x86, mce: Add mechanism to safely save information in MCE handler
x86, mce: handle "action required" errors
x86, mce: Recognise machine check bank signature for data path error
arch/x86/kernel/cpu/mcheck/mce-severity.c | 14 +++-
arch/x86/kernel/cpu/mcheck/mce.c | 153 ++++++++++++++++++++++-------
drivers/base/memory.c | 2 +-
include/linux/mm.h | 4 +-
mm/hwpoison-inject.c | 4 +-
mm/madvise.c | 2 +-
mm/memory-failure.c | 96 ++++++++++---------
7 files changed, 186 insertions(+), 89 deletions(-)
--
1.7.3.1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH 4/6] x86/mce: Add mechanism to safely save information in MCE handler
2012-01-03 19:49 [PATCH 0/6] x86, mce: machine check recovery for applications Tony Luck
2011-12-13 17:27 ` [PATCH 2/6] HWPOISON: Add code to handle "action required" errors Tony Luck
2011-12-13 17:48 ` [PATCH 3/6] x86/mce: Create helper function to save addr/misc when needed Tony Luck
@ 2011-12-14 23:55 ` Tony Luck
2011-12-15 18:48 ` [PATCH 1/6] HWPOISON: Clean up memory_failure() vs. __memory_failure() Tony Luck
` (2 subsequent siblings)
5 siblings, 0 replies; 8+ messages in thread
From: Tony Luck @ 2011-12-14 23:55 UTC (permalink / raw)
To: linux-kernel
Cc: Ingo Molnar, Borislav Petkov, Chen Gong, Huang, Ying,
Hidetoshi Seto
Machine checks on Intel cpus interrupt execution on all cpus, regardless
of interrupt masking. We have a need to save some data about the cause
of the machine check (physical address) in the machine check handler that
can be retrieved later to attempt recovery in a more flexible execution
state.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/mcheck/mce.c | 43 ++++++++++++++++++++++++++++++++++++++
1 files changed, 43 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2f1c200..e1579c5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -887,6 +887,49 @@ static void mce_clear_state(unsigned long *toclear)
}
/*
+ * Need to save faulting physical address associated with a process
+ * in the machine check handler some place where we can grab it back
+ * later in mce_notify_process()
+ */
+#define MCE_INFO_MAX 16
+
+struct mce_info {
+ atomic_t inuse;
+ struct task_struct *t;
+ __u64 paddr;
+} mce_info[MCE_INFO_MAX];
+
+static void mce_save_info(__u64 addr)
+{
+ struct mce_info *mi;
+
+ for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++) {
+ if (atomic_cmpxchg(&mi->inuse, 0, 1) == 0) {
+ mi->t = current;
+ mi->paddr = addr;
+ return;
+ }
+ }
+
+ mce_panic("Too many concurrent recoverable errors", NULL, NULL);
+}
+
+static struct mce_info *mce_find_info(void)
+{
+ struct mce_info *mi;
+
+ for (mi = mce_info; mi < &mce_info[MCE_INFO_MAX]; mi++)
+ if (atomic_read(&mi->inuse) && mi->t == current)
+ return mi;
+ return NULL;
+}
+
+static void mce_clear_info(struct mce_info *mi)
+{
+ atomic_set(&mi->inuse, 0);
+}
+
+/*
* The actual machine check handler. This only handles real
* exceptions when something got corrupted coming in through int 18.
*
--
1.7.3.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 1/6] HWPOISON: Clean up memory_failure() vs. __memory_failure()
2012-01-03 19:49 [PATCH 0/6] x86, mce: machine check recovery for applications Tony Luck
` (2 preceding siblings ...)
2011-12-14 23:55 ` [PATCH 4/6] x86/mce: Add mechanism to safely save information in MCE handler Tony Luck
@ 2011-12-15 18:48 ` Tony Luck
2012-01-03 19:45 ` [PATCH 5/6] x86/mce: Handle "action required" errors Tony Luck
2012-01-03 19:48 ` [PATCH 6/6] x86/mce: Recognise machine check bank signature for data path error Tony Luck
5 siblings, 0 replies; 8+ messages in thread
From: Tony Luck @ 2011-12-15 18:48 UTC (permalink / raw)
To: linux-kernel
Cc: Ingo Molnar, Borislav Petkov, Chen Gong, Huang, Ying,
Hidetoshi Seto
There is only one caller of memory_failure(), all other users call
__memory_failure() and pass in the flags argument explicitly. The
lone user of memory_failure() will soon need to pass flags too.
Add flags argument to the callsite in mce.c. Delete the old memory_failure()
function, and then rename __memory_failure() without the leading "__".
Provide clearer message when action optional memory errors are ignored.
Acked-by: Borislav Petkov <bp@amd64.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/mcheck/mce.c | 12 ++++++---
drivers/base/memory.c | 2 +-
include/linux/mm.h | 3 +-
mm/hwpoison-inject.c | 4 +-
mm/madvise.c | 2 +-
mm/memory-failure.c | 46 +++++++++++++++++--------------------
6 files changed, 34 insertions(+), 35 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 2af127d..1a08ce5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1046,11 +1046,15 @@ out:
}
EXPORT_SYMBOL_GPL(do_machine_check);
-/* dummy to break dependency. actual code is in mm/memory-failure.c */
-void __attribute__((weak)) memory_failure(unsigned long pfn, int vector)
+#ifndef CONFIG_MEMORY_FAILURE
+int memory_failure(unsigned long pfn, int vector, int flags)
{
- printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn);
+ printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n"
+ "Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn);
+
+ return 0;
}
+#endif
/*
* Called after mce notification in process context. This code
@@ -1068,7 +1072,7 @@ void mce_notify_process(void)
unsigned long pfn;
mce_notify_irq();
while (mce_ring_get(&pfn))
- memory_failure(pfn, MCE_VECTOR);
+ memory_failure(pfn, MCE_VECTOR, 0);
}
static void mce_process_work(struct work_struct *dummy)
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 8272d92..9a92444 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -474,7 +474,7 @@ store_hard_offline_page(struct class *class,
if (strict_strtoull(buf, 0, &pfn) < 0)
return -EINVAL;
pfn >>= PAGE_SHIFT;
- ret = __memory_failure(pfn, 0, 0);
+ ret = memory_failure(pfn, 0, 0);
return ret ? ret : count;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4baadd1..bcc5234 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1607,8 +1607,7 @@ void vmemmap_populate_print_last(void);
enum mf_flags {
MF_COUNT_INCREASED = 1 << 0,
};
-extern void memory_failure(unsigned long pfn, int trapno);
-extern int __memory_failure(unsigned long pfn, int trapno, int flags);
+extern int memory_failure(unsigned long pfn, int trapno, int flags);
extern void memory_failure_queue(unsigned long pfn, int trapno, int flags);
extern int unpoison_memory(unsigned long pfn);
extern int sysctl_memory_failure_early_kill;
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index c7fc7fd..cc448bb 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -45,7 +45,7 @@ static int hwpoison_inject(void *data, u64 val)
* do a racy check with elevated page count, to make sure PG_hwpoison
* will only be set for the targeted owner (or on a free page).
* We temporarily take page lock for try_get_mem_cgroup_from_page().
- * __memory_failure() will redo the check reliably inside page lock.
+ * memory_failure() will redo the check reliably inside page lock.
*/
lock_page(hpage);
err = hwpoison_filter(hpage);
@@ -55,7 +55,7 @@ static int hwpoison_inject(void *data, u64 val)
inject:
printk(KERN_INFO "Injecting memory failure at pfn %lx\n", pfn);
- return __memory_failure(pfn, 18, MF_COUNT_INCREASED);
+ return memory_failure(pfn, 18, MF_COUNT_INCREASED);
}
static int hwpoison_unpoison(void *data, u64 val)
diff --git a/mm/madvise.c b/mm/madvise.c
index 74bf193..f5ab745 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -251,7 +251,7 @@ static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
page_to_pfn(p), start);
/* Ignore return value for now */
- __memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
+ memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
}
return ret;
}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 06d3479..ab259bb 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -984,7 +984,25 @@ static void clear_page_hwpoison_huge_page(struct page *hpage)
ClearPageHWPoison(hpage + i);
}
-int __memory_failure(unsigned long pfn, int trapno, int flags)
+/**
+ * memory_failure - Handle memory failure of a page.
+ * @pfn: Page Number of the corrupted page
+ * @trapno: Trap number reported in the signal to user space.
+ * @flags: fine tune action taken
+ *
+ * This function is called by the low level machine check code
+ * of an architecture when it detects hardware memory corruption
+ * of a page. It tries its best to recover, which includes
+ * dropping pages, killing processes etc.
+ *
+ * The function is primarily of use for corruptions that
+ * happen outside the current execution context (e.g. when
+ * detected by a background scrubber)
+ *
+ * Must run in process context (e.g. a work queue) with interrupts
+ * enabled and no spinlocks hold.
+ */
+int memory_failure(unsigned long pfn, int trapno, int flags)
{
struct page_state *ps;
struct page *p;
@@ -1156,29 +1174,7 @@ out:
unlock_page(hpage);
return res;
}
-EXPORT_SYMBOL_GPL(__memory_failure);
-
-/**
- * memory_failure - Handle memory failure of a page.
- * @pfn: Page Number of the corrupted page
- * @trapno: Trap number reported in the signal to user space.
- *
- * This function is called by the low level machine check code
- * of an architecture when it detects hardware memory corruption
- * of a page. It tries its best to recover, which includes
- * dropping pages, killing processes etc.
- *
- * The function is primarily of use for corruptions that
- * happen outside the current execution context (e.g. when
- * detected by a background scrubber)
- *
- * Must run in process context (e.g. a work queue) with interrupts
- * enabled and no spinlocks hold.
- */
-void memory_failure(unsigned long pfn, int trapno)
-{
- __memory_failure(pfn, trapno, 0);
-}
+EXPORT_SYMBOL_GPL(memory_failure);
#define MEMORY_FAILURE_FIFO_ORDER 4
#define MEMORY_FAILURE_FIFO_SIZE (1 << MEMORY_FAILURE_FIFO_ORDER)
@@ -1251,7 +1247,7 @@ static void memory_failure_work_func(struct work_struct *work)
spin_unlock_irqrestore(&mf_cpu->lock, proc_flags);
if (!gotten)
break;
- __memory_failure(entry.pfn, entry.trapno, entry.flags);
+ memory_failure(entry.pfn, entry.trapno, entry.flags);
}
}
--
1.7.3.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 5/6] x86/mce: Handle "action required" errors
2012-01-03 19:49 [PATCH 0/6] x86, mce: machine check recovery for applications Tony Luck
` (3 preceding siblings ...)
2011-12-15 18:48 ` [PATCH 1/6] HWPOISON: Clean up memory_failure() vs. __memory_failure() Tony Luck
@ 2012-01-03 19:45 ` Tony Luck
2012-01-03 19:48 ` [PATCH 6/6] x86/mce: Recognise machine check bank signature for data path error Tony Luck
5 siblings, 0 replies; 8+ messages in thread
From: Tony Luck @ 2012-01-03 19:45 UTC (permalink / raw)
To: linux-kernel
Cc: Ingo Molnar, Borislav Petkov, Chen Gong, Huang, Ying,
Hidetoshi Seto
All non-urgent actions (reporting low severity errors and handling
"action-optional" errors) are now handled by a work queue. This
means that TIF_MCE_NOTIFY can be used to block execution for a
thread experiencing an "action-required" fault until we get all
cpus out of the machine check handler (and the thread that hit
the fault into mce_notify_process().
We use the new mce_{save,find,clear}_info() API to get information
from do_machine_check() to mce_notify_process(), and then use the
newly improved memory_failure(..., MF_ACTION_REQUIRED) to handle
the error (possibly signalling the process).
Update some comments to make the new code flows clearer.
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/mcheck/mce.c | 95 +++++++++++++++++++++-----------------
1 files changed, 53 insertions(+), 42 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index e1579c5..56e4e79 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -982,7 +982,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
barrier();
/*
- * When no restart IP must always kill or panic.
+ * When no restart IP might need to kill or panic.
+ * Assume the worst for now, but if we find the
+ * severity is MCE_AR_SEVERITY we have other options.
*/
if (!(m.mcgstatus & MCG_STATUS_RIPV))
kill_it = 1;
@@ -1036,12 +1038,6 @@ void do_machine_check(struct pt_regs *regs, long error_code)
continue;
}
- /*
- * Kill on action required.
- */
- if (severity == MCE_AR_SEVERITY)
- kill_it = 1;
-
mce_read_aux(&m, i);
/*
@@ -1062,6 +1058,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
}
}
+ /* mce_clear_state will clear *final, save locally for use later */
+ m = *final;
+
if (!no_way_out)
mce_clear_state(toclear);
@@ -1073,27 +1072,22 @@ void do_machine_check(struct pt_regs *regs, long error_code)
no_way_out = worst >= MCE_PANIC_SEVERITY;
/*
- * If we have decided that we just CAN'T continue, and the user
- * has not set tolerant to an insane level, give up and die.
- *
- * This is mainly used in the case when the system doesn't
- * support MCE broadcasting or it has been disabled.
+ * At insane "tolerant" levels we take no action. Otherwise
+ * we only die if we have no other choice. For less serious
+ * issues we try to recover, or limit damage to the current
+ * process.
*/
- if (no_way_out && tolerant < 3)
- mce_panic("Fatal machine check on current CPU", final, msg);
-
- /*
- * If the error seems to be unrecoverable, something should be
- * done. Try to kill as little as possible. If we can kill just
- * one task, do that. If the user has set the tolerance very
- * high, don't try to do anything at all.
- */
-
- if (kill_it && tolerant < 3)
- force_sig(SIGBUS, current);
-
- /* notify userspace ASAP */
- set_thread_flag(TIF_MCE_NOTIFY);
+ if (tolerant < 3) {
+ if (no_way_out)
+ mce_panic("Fatal machine check on current CPU", &m, msg);
+ if (worst == MCE_AR_SEVERITY) {
+ /* schedule action before return to userland */
+ mce_save_info(m.addr);
+ set_thread_flag(TIF_MCE_NOTIFY);
+ } else if (kill_it) {
+ force_sig(SIGBUS, current);
+ }
+ }
if (worst > 0)
mce_report_event(regs);
@@ -1107,6 +1101,8 @@ EXPORT_SYMBOL_GPL(do_machine_check);
#ifndef CONFIG_MEMORY_FAILURE
int memory_failure(unsigned long pfn, int vector, int flags)
{
+ /* mce_severity() should not hand us an ACTION_REQUIRED error */
+ BUG_ON(flags & MF_ACTION_REQUIRED);
printk(KERN_ERR "Uncorrected memory error in page 0x%lx ignored\n"
"Rebuild kernel with CONFIG_MEMORY_FAILURE=y for smarter handling\n", pfn);
@@ -1115,27 +1111,44 @@ int memory_failure(unsigned long pfn, int vector, int flags)
#endif
/*
- * Called after mce notification in process context. This code
- * is allowed to sleep. Call the high level VM handler to process
- * any corrupted pages.
- * Assume that the work queue code only calls this one at a time
- * per CPU.
- * Note we don't disable preemption, so this code might run on the wrong
- * CPU. In this case the event is picked up by the scheduled work queue.
- * This is merely a fast path to expedite processing in some common
- * cases.
+ * Called in process context that interrupted by MCE and marked with
+ * TIF_MCE_NOTIFY, just before returning to erroneous userland.
+ * This code is allowed to sleep.
+ * Attempt possible recovery such as calling the high level VM handler to
+ * process any corrupted pages, and kill/signal current process if required.
+ * Action required errors are handled here.
*/
void mce_notify_process(void)
{
unsigned long pfn;
- mce_notify_irq();
- while (mce_ring_get(&pfn))
- memory_failure(pfn, MCE_VECTOR, 0);
+ struct mce_info *mi = mce_find_info();
+
+ if (!mi)
+ mce_panic("Lost physical address for unconsumed uncorrectable error", NULL, NULL);
+ pfn = mi->paddr >> PAGE_SHIFT;
+
+ clear_thread_flag(TIF_MCE_NOTIFY);
+
+ pr_err("Uncorrected hardware memory error in user-access at %llx",
+ mi->paddr);
+ if (memory_failure(pfn, MCE_VECTOR, MF_ACTION_REQUIRED) < 0) {
+ pr_err("Memory error not recovered");
+ force_sig(SIGBUS, current);
+ }
+ mce_clear_info(mi);
}
+/*
+ * Action optional processing happens here (picking up
+ * from the list of faulting pages that do_machine_check()
+ * placed into the "ring").
+ */
static void mce_process_work(struct work_struct *dummy)
{
- mce_notify_process();
+ unsigned long pfn;
+
+ while (mce_ring_get(&pfn))
+ memory_failure(pfn, MCE_VECTOR, 0);
}
#ifdef CONFIG_X86_MCE_INTEL
@@ -1225,8 +1238,6 @@ int mce_notify_irq(void)
/* Not more than two messages every minute */
static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2);
- clear_thread_flag(TIF_MCE_NOTIFY);
-
if (test_and_clear_bit(0, &mce_need_notify)) {
/* wake processes polling /dev/mcelog */
wake_up_interruptible(&mce_chrdev_wait);
--
1.7.3.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 6/6] x86/mce: Recognise machine check bank signature for data path error
2012-01-03 19:49 [PATCH 0/6] x86, mce: machine check recovery for applications Tony Luck
` (4 preceding siblings ...)
2012-01-03 19:45 ` [PATCH 5/6] x86/mce: Handle "action required" errors Tony Luck
@ 2012-01-03 19:48 ` Tony Luck
5 siblings, 0 replies; 8+ messages in thread
From: Tony Luck @ 2012-01-03 19:48 UTC (permalink / raw)
To: linux-kernel
Cc: Ingo Molnar, Borislav Petkov, Chen Gong, Huang, Ying,
Hidetoshi Seto
Action required data path signature is defined in table 15-19 of SDM:
+-----------------------------------------------------------------------------+
| SRAR Error | Valid | OVER | UC | EN | MISCV | ADDRV | PCC | S | AR | MCACOD |
| Data Load | 1 | 0 | 1 | 1 | 1 | 1 | 0 | 1 | 1 | 0x134 |
+-----------------------------------------------------------------------------+
Recognise this, and pass MCE_AR_SEVERITY code back to do_machine_check() if
we have the action handler configured (CONFIG_MEMORY_FAILURE=y)
Acked-by: Borislav Petkov <bp@amd64.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
arch/x86/kernel/cpu/mcheck/mce-severity.c | 16 +++++++++++++++-
1 files changed, 15 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 7395d5f..f6c92f9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -54,6 +54,7 @@ static struct severity {
#define MASK(x, y) .mask = x, .result = y
#define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S)
#define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR)
+#define MCI_ADDR (MCI_STATUS_ADDRV|MCI_STATUS_MISCV)
#define MCACOD 0xffff
MCESEV(
@@ -102,11 +103,24 @@ static struct severity {
SER, BITCLR(MCI_STATUS_S)
),
- /* AR add known MCACODs here */
MCESEV(
PANIC, "Action required with lost events",
SER, BITSET(MCI_STATUS_OVER|MCI_UC_SAR)
),
+
+ /* known AR MCACODs: */
+#ifdef CONFIG_MEMORY_FAILURE
+ MCESEV(
+ KEEP, "HT thread notices Action required: data load error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|0x0134),
+ MCGMASK(MCG_STATUS_EIPV, 0)
+ ),
+ MCESEV(
+ AR, "Action required: data load error",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|0x0134),
+ USER
+ ),
+#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_SAR)
--
1.7.3.1
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 0/6] x86, mce: machine check recovery for applications
@ 2012-01-03 19:49 Tony Luck
2011-12-13 17:27 ` [PATCH 2/6] HWPOISON: Add code to handle "action required" errors Tony Luck
` (5 more replies)
0 siblings, 6 replies; 8+ messages in thread
From: Tony Luck @ 2012-01-03 19:49 UTC (permalink / raw)
To: linux-kernel
Cc: Ingo Molnar, Borislav Petkov, Chen Gong, Huang, Ying,
Hidetoshi Seto
This series adds code to recognise the machine check signature for
a recoverable error in the data path (Advanced SKUs of "Sandy Bridge"
server processors are the first to be able to allow s/w recovery for
this case), save the required information in the machine check handler
and then call to the generic memory_failure() code to try for
graceful error recovery (sending SIGBUS to affected process(es)).
Updates since last version (December 15th)
Part1-4: unchanged
Part5: Changed stub function for CONFIG_MEMORY_FAILURE=n case to BUG_ON
if it is handed an MF_ACTION_REQUIRED case (this indicates an error in
severity calculation). Drop "Memory error recovered" message (enough
chatter already).
Part6: Only pass back an ACTION_REQUIRED severity to a kernel if it is built
with CONFIG_MEMORY_FAILURE=y (i.e. has the code to take the action).
Whole series is available in:
git://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git mce-recovery
Tony Luck (6):
HWPOISON: clean up memory_failure() vs. __memory_failure()
HWPOISON: Add code to handle "action required" errors.
x86, mce: create helper function to save addr/misc when needed
x86, mce: Add mechanism to safely save information in MCE handler
x86, mce: handle "action required" errors
x86, mce: Recognise machine check bank signature for data path error
arch/x86/kernel/cpu/mcheck/mce-severity.c | 16 +++-
arch/x86/kernel/cpu/mcheck/mce.c | 179 ++++++++++++++++++++---------
drivers/base/memory.c | 2 +-
include/linux/mm.h | 4 +-
mm/hwpoison-inject.c | 4 +-
mm/madvise.c | 2 +-
mm/memory-failure.c | 96 ++++++++--------
7 files changed, 197 insertions(+), 106 deletions(-)
--
1.7.3.1
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2012-01-03 20:21 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-01-03 19:49 [PATCH 0/6] x86, mce: machine check recovery for applications Tony Luck
2011-12-13 17:27 ` [PATCH 2/6] HWPOISON: Add code to handle "action required" errors Tony Luck
2011-12-13 17:48 ` [PATCH 3/6] x86/mce: Create helper function to save addr/misc when needed Tony Luck
2011-12-14 23:55 ` [PATCH 4/6] x86/mce: Add mechanism to safely save information in MCE handler Tony Luck
2011-12-15 18:48 ` [PATCH 1/6] HWPOISON: Clean up memory_failure() vs. __memory_failure() Tony Luck
2012-01-03 19:45 ` [PATCH 5/6] x86/mce: Handle "action required" errors Tony Luck
2012-01-03 19:48 ` [PATCH 6/6] x86/mce: Recognise machine check bank signature for data path error Tony Luck
-- strict thread matches above, loose matches on Subject: below --
2011-12-13 19:05 [PATCH 0/6] x86, mce: machine check recovery for applications Tony Luck
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.