public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Chen Yucong <slaoub@gmail.com>
To: Borislav Petkov <bp@alien8.de>
Cc: tony.luck@intel.com, linux-edac@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it
Date: Wed, 01 Oct 2014 13:26:04 +0800	[thread overview]
Message-ID: <1412141164.21488.39.camel@debian> (raw)
In-Reply-To: <20140930100940.GD4639@pd.tnic>

[-- Attachment #1: Type: text/plain, Size: 640 bytes --]

On Tue, 2014-09-30 at 12:09 +0200, Borislav Petkov wrote:
> 
> Now let me repeat my question: how are you testing your patches?
> 
There are no any hardware facilities that can help me to inject some
MCE errors. So I have to modify the kernel source code for testing my
patches.

My method is based on the `mce-injection' that is better suited to 
Intel processors. So I have replaced rdmsrl/wrmsrl/rdmsr_safe with
mce_rdmsrl/mce_wrmsrl/mce_rdmsr_safe in mce_amd.c. But I use a new
kernel module for error injection instead of writing /dev/mcelog.

For more detailed information about testing, you can refer the 
attachments.

thx!
cyc 
  

[-- Attachment #2: amd-mce-injection.patch --]
[-- Type: text/x-patch, Size: 5299 bytes --]

diff -uNr amd_inject/linux-3.16.3/arch/x86/include/asm/mce.h linux-3.16.3/arch/x86/include/asm/mce.h
--- amd_inject/linux-3.16.3/arch/x86/include/asm/mce.h	2014-09-18 01:22:16.000000000 +0800
+++ linux-3.16.3/arch/x86/include/asm/mce.h	2014-10-01 09:36:06.302670241 +0800
@@ -166,6 +166,7 @@
 #endif
 
 #ifdef CONFIG_X86_MCE_AMD
+void raise_amd_threshold_event(void);
 void mce_amd_feature_init(struct cpuinfo_x86 *c);
 #else
 static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { }
@@ -185,10 +186,14 @@
 	MCP_DONTLOG = (1 << 2),		/* only clear, don't log */
 };
 void machine_check_poll(enum mcp_flags flags, mce_banks_t *b);
+u64 mce_rdmsrl(u32 msr);
+void mce_wrmsrl(u32 msr, u64 v);
+int mce_rdmsr_safe(u32 msr, u32 *low, u32 *high);
 
 int mce_notify_irq(void);
 void mce_notify_process(void);
 
+extern int amd_inject;
 DECLARE_PER_CPU(struct mce, injectm);
 
 extern void register_mce_write_callback(ssize_t (*)(struct file *filp,
diff -uNr amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce_amd.c linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce_amd.c
--- amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce_amd.c	2014-09-18 01:22:16.000000000 +0800
+++ linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce_amd.c	2014-10-01 11:09:07.817585622 +0800
@@ -274,6 +274,7 @@
 	struct mce m;
 
 	mce_setup(&m);
+	m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
 
 	/* assume first bank caused it */
 	for (bank = 0; bank < mca_cfg.banks; ++bank) {
@@ -291,7 +292,7 @@
 				++address;
 			}
 
-			if (rdmsr_safe(address, &low, &high))
+			if (mce_rdmsr_safe(address, &low, &high))
 				break;
 
 			if (!(high & MASK_VALID_HI)) {
@@ -305,26 +306,35 @@
 			     (high & MASK_LOCKED_HI))
 				continue;
 
-			/*
-			 * Log the machine check that caused the threshold
-			 * event.
-			 */
-			machine_check_poll(MCP_TIMESTAMP,
-					&__get_cpu_var(mce_poll_banks));
-
 			if (high & MASK_OVERFLOW_HI) {
-				rdmsrl(address, m.misc);
-				rdmsrl(MSR_IA32_MC0_STATUS + bank * 4,
-				       m.status);
+				m.misc = mce_rdmsrl(address);
+				m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + bank * 4);
+				if (m.status & MCI_STATUS_ADDRV)
+					m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + bank * 4);
 				m.bank = K8_MCE_THRESHOLD_BASE
 				       + bank * NR_BLOCKS
 				       + block;
 				mce_log(&m);
+				mce_wrmsrl(MSR_IA32_MC0_STATUS + bank * 4, 0);
 				return;
 			}
 		}
 	}
+
+	/*
+	 * Log the machine check that caused the threshold
+	 * event.
+	 */
+	machine_check_poll(MCP_TIMESTAMP,
+				&__get_cpu_var(mce_poll_banks));
+
+}
+
+void raise_amd_threshold_event(void)
+{
+	amd_threshold_interrupt();
 }
+EXPORT_SYMBOL_GPL(raise_amd_threshold_event);
 
 /*
  * Sysfs Interface
diff -uNr amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce.c linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce.c
--- amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce.c	2014-09-18 01:22:16.000000000 +0800
+++ linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce.c	2014-10-01 09:40:13.269228358 +0800
@@ -48,6 +48,9 @@
 
 #include "mce-internal.h"
 
+int amd_inject = 0;
+EXPORT_PER_CPU_SYMBOL_GPL(amd_inject);
+
 static DEFINE_MUTEX(mce_chrdev_read_mutex);
 
 #define rcu_dereference_check_mce(p) \
@@ -131,6 +134,7 @@
 	m->apicid = cpu_data(m->extcpu).initial_apicid;
 	rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
 }
+EXPORT_SYMBOL_GPL(mce_setup);
 
 DEFINE_PER_CPU(struct mce, injectm);
 EXPORT_PER_CPU_SYMBOL_GPL(injectm);
@@ -391,7 +395,7 @@
 }
 
 /* MSR access wrappers used for error injection */
-static u64 mce_rdmsrl(u32 msr)
+u64 mce_rdmsrl(u32 msr)
 {
 	u64 v;
 
@@ -415,8 +419,9 @@
 
 	return v;
 }
 
-static void mce_wrmsrl(u32 msr, u64 v)
+void mce_wrmsrl(u32 msr, u64 v)
 {
 	if (__this_cpu_read(injectm.finished)) {
 		int offset = msr_to_offset(msr);
@@ -427,6 +432,18 @@
 	}
 	wrmsrl(msr, v);
 }
+
+int mce_rdmsr_safe(u32 msr, u32 *low, u32 *high) 
+{
+        u64 __val = mce_rdmsrl(msr);
+
+        (*low) = (u32)__val;
+        (*high) = (u32)(__val >> 32);
+
+	return 0;
+}
 
 /*
  * Collect all global (w.r.t. this processor) status about this machine
@@ -1637,6 +1654,7 @@
 		mce_adjust_timer = mce_intel_adjust_timer;
 		break;
 	case X86_VENDOR_AMD:
+		amd_inject = 1;
 		mce_amd_feature_init(c);
 		break;
 	default:
diff -uNr amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce-inject.c linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce-inject.c
--- amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce-inject.c	2014-09-18 01:22:16.000000000 +0800
+++ linux-3.16.3/arch/x86/kernel/cpu/mcheck/mce-inject.c	2014-09-30 22:38:30.138557839 +0800
@@ -54,7 +54,10 @@
 
 	memset(&b, 0xff, sizeof(mce_banks_t));
 	local_irq_save(flags);
-	machine_check_poll(0, &b);
+	if (!amd_inject)
+		machine_check_poll(0, &b);
+	else 
+		mce_threshold_vector();
 	local_irq_restore(flags);
 	m->finished = 0;
 }
diff -uNr amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c
--- amd_inject/linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c	2014-09-18 01:22:16.000000000 +0800
+++ linux-3.16.3/arch/x86/kernel/cpu/mcheck/threshold.c	2014-10-01 08:49:06.140738192 +0800
@@ -17,6 +17,7 @@
 }
 
 void (*mce_threshold_vector)(void) = default_threshold_interrupt;
+EXPORT_SYMBOL_GPL(mce_threshold_vector);
 
 static inline void __smp_threshold_interrupt(void)
 {

[-- Attachment #3: amd_inject.c --]
[-- Type: text/x-csrc, Size: 1613 bytes --]

/*
 * Copyright Chen Yucong<slaoub@gmail.com> 2014 
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 */
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/smp.h>
#include <linux/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/amd_nb.h>

#define MASK_OVERFLOW  0x0001000000000000

/* Update fake mce registers on current CPU. */
static void inject_mce(struct mce *m)
{
	struct mce *i = &per_cpu(injectm, m->extcpu);

	/* Make sure no one reads partially written injectm */
	i->finished = 0;
	mb();
	m->finished = 0;
	/* First set the fields after finished */
	i->extcpu = m->extcpu;
	mb();
	/* Now write record in order, finished last (except above) */
	memcpy(i, m, sizeof(struct mce));
	/* Finally activate it */
	mb();
	i->finished = 1;
}

static void raise_mce(void)
{
	struct mce m;

	mce_setup(&m);
	m.status = 0X8C00000000000000;
	m.misc = 0XC008000000000000 | MASK_OVERFLOW;
	//m.misc = 0XC008000000000000;
	m.bank = 4;
	m.addr = 0xabcdef;
	inject_mce(&m);

	raise_amd_threshold_event();
}

static int __init amd_inject_init(void)
{
	raise_mce();
	pr_info("amd_inject module loaded ...\n");

	return 0;
}

static void __exit amd_inject_exit(void)
{
	pr_info("amd_inject module unloaded ...\n");
}

module_init(amd_inject_init);
module_exit(amd_inject_exit);

/*
 * Cannot tolerate unloading currently because we cannot
 * guarantee all openers of mce_chrdev will get a reference to us.
 */
MODULE_LICENSE("GPL");

  parent reply	other threads:[~2014-10-01  5:26 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-23  2:16 [PATCH] x86, MCE, AMD: use macros to compute bank MSRs Chen Yucong
2014-09-23  8:19 ` [PATCH] x86, MCE, AMD: save IA32_MCi_STATUS before machine_check_poll() resets it Chen Yucong
2014-09-28  8:15   ` Chen Yucong
2014-09-29 12:05   ` Borislav Petkov
2014-09-30  0:39     ` Chen Yucong
2014-09-30  7:25       ` Borislav Petkov
2014-09-30  9:56         ` Chen Yucong
2014-09-30 10:09           ` Borislav Petkov
2014-10-01  4:35             ` Chen Yucong
2014-10-02 13:12               ` Borislav Petkov
2014-10-02 14:37                 ` Chen Yucong
     [not found]                 ` <CAOjmkp9qQiTbqU3NUhUDAoQAa8wAPJnE_qXbDuBKrA3ee1_APQ@mail.gmail.com>
2014-10-08 21:52                   ` Fwd: " Aravind Gopalakrishnan
2014-10-08 22:57                     ` Borislav Petkov
2014-10-09 16:53                       ` Aravind Gopalakrishnan
2014-10-09 17:35                         ` Borislav Petkov
2014-10-09 19:01                           ` Aravind Gopalakrishnan
2014-10-21 20:28                             ` Borislav Petkov
2014-10-22  1:51                               ` Chen Yucong
2014-10-22  8:16                                 ` Borislav Petkov
2014-10-22  8:53                                   ` Chen Yucong
2014-10-22  9:30                                     ` Borislav Petkov
2014-10-29 15:59                                       ` Aravind Gopalakrishnan
2014-10-30 19:04                                         ` Aravind Gopalakrishnan
2014-10-30 21:39                                           ` Borislav Petkov
2014-10-01  5:26             ` Chen Yucong [this message]
2014-10-01 10:10               ` Borislav Petkov
2014-09-28  8:09 ` [PATCH] x86, MCE, AMD: use macros to compute bank MSRs Chen Yucong
2014-09-29 11:48 ` Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1412141164.21488.39.camel@debian \
    --to=slaoub@gmail.com \
    --cc=bp@alien8.de \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox