From: Chen Yucong <slaoub@gmail.com>
To: Borislav Petkov <bp@alien8.de>
Cc: tony.luck@intel.com, linux-edac@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it
Date: Wed, 01 Oct 2014 13:26:04 +0800 [thread overview]
Message-ID: <1412141164.21488.39.camel@debian> (raw)
In-Reply-To: <20140930100940.GD4639@pd.tnic>
[-- Attachment #1: Type: text/plain, Size: 640 bytes --]
On Tue, 2014-09-30 at 12:09 +0200, Borislav Petkov wrote:
>
> Now let me repeat my question: how are you testing your patches?
>
There are no any hardware facilities that can help me to inject some
MCE errors. So I have to modify the kernel source code for testing my
patches.
My method is based on the `mce-injection' that is better suited to
Intel processors. So I have replaced rdmsrl/wrmsrl/rdmsr_safe with
mce_rdmsrl/mce_wrmsrl/mce_rdmsr_safe in mce_amd.c. But I use a new
kernel module for error injection instead of writing /dev/mcelog.
For more detailed information about testing, you can refer the
attachments.
thx!
cyc
[-- Attachment #2: amd-mce-injection.patch --]
[-- Type: text/x-patch, Size: 5299 bytes --]
diff -uNr amd_inject/linux-3.16.3/arch/x86/include/asm/mce.h linux-3.16.3/arch/x86/include/asm/mce.h
--- amd_inject/linux-3.16.3/arch/x86/include/asm/mce.h 2014-09-18 01:22:16.000000000 +0800
+++ linux-3.16.3/arch/x86/include/asm/mce.h 2014-10-01 09:36:06.302670241 +0800
@@ -166,6 +166,7 @@
#endif
#ifdef CONFIG_X86_MCE_AMD
+void raise_amd_threshold_event(void);
void mce_amd_feature_init(struct cpuinfo_x86 *c);
#else
static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
@@ -185,10 +186,14 @@
MCP_DONTLOG = (1 << 2), /* only clear, don't log */
};
void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+u64 mce_rdmsrl(u32 msr);
+void mce_wrmsrl(u32 msr, u64 v);
+int mce_rdmsr_safe(u32 msr, u32 *low, u32 *high);
int mce_notify_irq(void);
void mce_notify_process(void);
+extern int amd_inject;
DECLARE_PER_CPU(struct mce, injectm);
extern void register_mce_write_callback(ssize_t (*)(struct file *filp,
diff -uNr amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce_amd.c linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce_amd.c
--- amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce_amd.c 2014-09-18 01:22:16.000000000 +0800
+++ linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce_amd.c 2014-10-01 11:09:07.817585622 +0800
@@ -274,6 +274,7 @@
struct mce m;
mce_setup(&m);
+ m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
/* assume first bank caused it */
for (bank = 0; bank < mca_cfg.banks; ++bank) {
@@ -291,7 +292,7 @@
++address;
}
- if (rdmsr_safe(address, &low, &high))
+ if (mce_rdmsr_safe(address, &low, &high))
break;
if (!(high & MASK_VALID_HI)) {
@@ -305,26 +306,35 @@
(high & MASK_LOCKED_HI))
continue;
- /*
- * Log the machine check that caused the threshold
- * event.
- */
- machine_check_poll(MCP_TIMESTAMP,
- &__get_cpu_var(mce_poll_banks));
-
if (high & MASK_OVERFLOW_HI) {
- rdmsrl(address, m.misc);
- rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
- m.status);
+ m.misc = mce_rdmsrl(address);
+ m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + bank * 4);
+ if (m.status & MCI_STATUS_ADDRV)
+ m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + bank * 4);
m.bank = K8_MCE_THRESHOLD_BASE
+ bank * NR_BLOCKS
+ block;
mce_log(&m);
+ mce_wrmsrl(MSR_IA32_MC0_STATUS + bank * 4, 0);
return;
}
}
}
+
+ /*
+ * Log the machine check that caused the threshold
+ * event.
+ */
+ machine_check_poll(MCP_TIMESTAMP,
+ &__get_cpu_var(mce_poll_banks));
+
+}
+
+void raise_amd_threshold_event(void)
+{
+ amd_threshold_interrupt();
}
+EXPORT_SYMBOL_GPL(raise_amd_threshold_event);
/*
* Sysfs Interface
diff -uNr amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce.c linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce.c
--- amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce.c 2014-09-18 01:22:16.000000000 +0800
+++ linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce.c 2014-10-01 09:40:13.269228358 +0800
@@ -48,6 +48,9 @@
#include "mce-internal.h"
+int amd_inject = 0;
+EXPORT_PER_CPU_SYMBOL_GPL(amd_inject);
+
static DEFINE_MUTEX(mce_chrdev_read_mutex);
#define rcu_dereference_check_mce(p) \
@@ -131,6 +134,7 @@
m->apicid = cpu_data(m->extcpu).initial_apicid;
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
}
+EXPORT_SYMBOL_GPL(mce_setup);
DEFINE_PER_CPU(struct mce, injectm);
EXPORT_PER_CPU_SYMBOL_GPL(injectm);
@@ -391,7 +395,7 @@
}
/* MSR access wrappers used for error injection */
-static u64 mce_rdmsrl(u32 msr)
+u64 mce_rdmsrl(u32 msr)
{
u64 v;
@@ -415,8 +419,9 @@
return v;
}
-static void mce_wrmsrl(u32 msr, u64 v)
+void mce_wrmsrl(u32 msr, u64 v)
{
if (__this_cpu_read(injectm.finished)) {
int offset = msr_to_offset(msr);
@@ -427,6 +432,18 @@
}
wrmsrl(msr, v);
}
+
+int mce_rdmsr_safe(u32 msr, u32 *low, u32 *high)
+{
+ u64 __val = mce_rdmsrl(msr);
+
+ (*low) = (u32)__val;
+ (*high) = (u32)(__val >> 32);
+
+ return 0;
+}
/*
* Collect all global (w.r.t. this processor) status about this machine
@@ -1637,6 +1654,7 @@
mce_adjust_timer = mce_intel_adjust_timer;
break;
case X86_VENDOR_AMD:
+ amd_inject = 1;
mce_amd_feature_init(c);
break;
default:
diff -uNr amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce-inject.c linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce-inject.c
--- amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce-inject.c 2014-09-18 01:22:16.000000000 +0800
+++ linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce-inject.c 2014-09-30 22:38:30.138557839 +0800
@@ -54,7 +54,10 @@
memset(&b, 0xff, sizeof(mce_banks_t));
local_irq_save(flags);
- machine_check_poll(0, &b);
+ if (!amd_inject)
+ machine_check_poll(0, &b);
+ else
+ mce_threshold_vector();
local_irq_restore(flags);
m->finished = 0;
}
diff -uNr amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c
--- amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c 2014-09-18 01:22:16.000000000 +0800
+++ linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c 2014-10-01 08:49:06.140738192 +0800
@@ -17,6 +17,7 @@
}
void (*mce_threshold_vector)(void) = default_threshold_interrupt;
+EXPORT_SYMBOL_GPL(mce_threshold_vector);
static inline void __smp_threshold_interrupt(void)
{
[-- Attachment #3: amd_inject.c --]
[-- Type: text/x-csrc, Size: 1613 bytes --]
/*
* Copyright Chen Yucong<slaoub@gmail.com> 2014
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; version 2
* of the License.
*/
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/amd_nb.h>
#define MASK_OVERFLOW 0x0001000000000000
/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
{
struct mce *i = &per_cpu(injectm, m->extcpu);
/* Make sure no one reads partially written injectm */
i->finished = 0;
mb();
m->finished = 0;
/* First set the fields after finished */
i->extcpu = m->extcpu;
mb();
/* Now write record in order, finished last (except above) */
memcpy(i, m, sizeof(struct mce));
/* Finally activate it */
mb();
i->finished = 1;
}
static void raise_mce(void)
{
struct mce m;
mce_setup(&m);
m.status = 0X8C00000000000000;
m.misc = 0XC008000000000000 | MASK_OVERFLOW;
//m.misc = 0XC008000000000000;
m.bank = 4;
m.addr = 0xabcdef;
inject_mce(&m);
raise_amd_threshold_event();
}
static int __init amd_inject_init(void)
{
raise_mce();
pr_info("amd_inject module loaded ...\n");
return 0;
}
static void __exit amd_inject_exit(void)
{
pr_info("amd_inject module unloaded ...\n");
}
module_init(amd_inject_init);
module_exit(amd_inject_exit);
/*
* Cannot tolerate unloading currently because we cannot
* guarantee all openers of mce_chrdev will get a reference to us.
*/
MODULE_LICENSE("GPL");
next prev parent reply other threads:[~2014-10-01 5:26 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-09-23 2:16 [PATCH] x86, MCE, AMD: use macros to compute bank MSRs Chen Yucong
2014-09-23 8:19 ` [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it Chen Yucong
2014-09-28 8:15 ` Chen Yucong
2014-09-29 12:05 ` Borislav Petkov
2014-09-30 0:39 ` Chen Yucong
2014-09-30 7:25 ` Borislav Petkov
2014-09-30 9:56 ` Chen Yucong
2014-09-30 10:09 ` Borislav Petkov
2014-10-01 4:35 ` Chen Yucong
2014-10-02 13:12 ` Borislav Petkov
2014-10-02 14:37 ` Chen Yucong
[not found] ` <CAOjmkp9qQiTbqU3NUhUDAoQAa8wAPJnE_qXbDuBKrA3ee1_APQ@mail.gmail.com>
2014-10-08 21:52 ` Fwd: " Aravind Gopalakrishnan
2014-10-08 22:57 ` Borislav Petkov
2014-10-09 16:53 ` Aravind Gopalakrishnan
2014-10-09 17:35 ` Borislav Petkov
2014-10-09 19:01 ` Aravind Gopalakrishnan
2014-10-21 20:28 ` Borislav Petkov
2014-10-22 1:51 ` Chen Yucong
2014-10-22 8:16 ` Borislav Petkov
2014-10-22 8:53 ` Chen Yucong
2014-10-22 9:30 ` Borislav Petkov
2014-10-29 15:59 ` Aravind Gopalakrishnan
2014-10-30 19:04 ` Aravind Gopalakrishnan
2014-10-30 21:39 ` Borislav Petkov
2014-10-01 5:26 ` Chen Yucong [this message]
2014-10-01 10:10 ` Borislav Petkov
2014-09-28 8:09 ` [PATCH] x86, MCE, AMD: use macros to compute bank MSRs Chen Yucong
2014-09-29 11:48 ` Borislav Petkov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1412141164.21488.39.camel@debian \
--to=slaoub@gmail.com \
--cc=bp@alien8.de \
--cc=linux-edac@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=tony.luck@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox