From: "Chen, Gong" <gong.chen@linux.intel.com>
To: tony.luck@intel.com, bp@alien8.de, m.chehab@samsung.com
Cc: rostedt@goodmis.org, linux-acpi@vger.kernel.org,
arozansk@redhat.com, "Chen, Gong" <gong.chen@linux.intel.com>
Subject: [PATCH 3/5] trace, RAS: Add eMCA trace event interface
Date: Fri, 28 Mar 2014 01:52:59 -0400 [thread overview]
Message-ID: <1395985981-20476-4-git-send-email-gong.chen@linux.intel.com> (raw)
In-Reply-To: <1395985981-20476-1-git-send-email-gong.chen@linux.intel.com>
Add trace interface to elaborate all H/W error related information.
Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
drivers/acpi/Kconfig | 3 ++-
drivers/acpi/Makefile | 1 +
drivers/acpi/acpi_extlog.c | 56 ++++++++++++++++++++++++++++++++++++++++---
drivers/ras/Kconfig | 2 +-
drivers/ras/ras-traces.c | 1 +
include/ras/ras_event.h | 60 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 118 insertions(+), 5 deletions(-)
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 4770de5..3e569d4 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -363,6 +363,7 @@ config ACPI_EXTLOG
Enhanced MCA Logging allows firmware to provide additional error
information to system software, synchronous with MCE or CMCI. This
- driver adds support for that functionality.
+ driver adds support for that functionality with corresponding
+ tracepoint which carries that information to userspace.
endif # ACPI
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 0331f91..f6abc4a 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -82,4 +82,5 @@ obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
obj-$(CONFIG_ACPI_APEI) += apei/
+CFLAGS_acpi_extlog.o := -I$(src)
obj-$(CONFIG_ACPI_EXTLOG) += acpi_extlog.o
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index c4a5d87..0ee2c38 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -16,6 +16,7 @@
#include <asm/mce.h>
#include "apei/apei-internal.h"
+#include <ras/ras_event.h>
#define EXT_ELOG_ENTRY_MASK GENMASK_ULL(51, 0) /* elog entry address mask */
@@ -44,6 +45,7 @@ struct extlog_l1_head {
static int old_edac_report_status;
static u8 extlog_dsm_uuid[] __initdata = "663E35AF-CC10-41A4-88EA-5470AF055295";
+static const uuid_le invalid_uuid = NULL_UUID_LE;
/* L1 table related physical address */
static u64 elog_base;
@@ -69,6 +71,34 @@ static u32 l1_percpu_entry;
#define ELOG_ENTRY_ADDR(phyaddr) \
(phyaddr - elog_base + (u8 *)elog_addr)
+static void __trace_mem_error(const uuid_le *fru_id, char *fru_text,
+ u64 err_count, u32 severity,
+ struct cper_sec_mem_err *mem)
+{
+ u32 etype = ~0U;
+ u64 phy_addr = ~0ull;
+ unsigned long flags;
+ char *mem_location;
+ char *dimm_location;
+
+ if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
+ etype = mem->error_type;
+
+ if (mem->validation_bits & CPER_MEM_VALID_PA) {
+ phy_addr = mem->physical_addr;
+ if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
+ phy_addr &= mem->physical_addr_mask;
+ }
+
+ raw_spin_lock_irqsave(&cper_loc_lock, flags);
+ mem_location = cper_mem_err_location(mem);
+ dimm_location = cper_dimm_err_location(mem);
+
+ trace_extlog_mem_event(etype, dimm_location, fru_id, fru_text,
+ err_count, severity, phy_addr, mem_location);
+ raw_spin_unlock_irqrestore(&cper_loc_lock, flags);
+}
+
static struct acpi_generic_status *extlog_elog_entry_check(int cpu, int bank)
{
int idx;
@@ -137,8 +167,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
struct mce *mce = (struct mce *)data;
int bank = mce->bank;
int cpu = mce->extcpu;
- struct acpi_generic_status *estatus;
- int rc;
+ struct acpi_generic_status *estatus, *tmp;
+ struct acpi_generic_data *gdata;
+ const uuid_le *fru_id = &invalid_uuid;
+ char *fru_text = "";
+ uuid_le *sec_type;
+ static u64 err_count;
estatus = extlog_elog_entry_check(cpu, bank);
if (estatus == NULL)
@@ -148,7 +182,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
/* clear record status to enable BIOS to update it again */
estatus->block_status = 0;
- rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
+ tmp = (struct acpi_generic_status *)elog_buf;
+ print_extlog_rcd(NULL, tmp, cpu);
+
+ /* log event via trace */
+ err_count++;
+ gdata = (struct acpi_generic_data *)(tmp + 1);
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+ fru_id = (uuid_le *)gdata->fru_id;
+ if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+ fru_text = gdata->fru_text;
+ sec_type = (uuid_le *)gdata->section_type;
+ if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+ struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
+ if (gdata->error_data_length >= sizeof(*mem_err))
+ __trace_mem_error(fru_id, fru_text, err_count,
+ gdata->error_severity, mem_err);
+ }
return NOTIFY_STOP;
}
diff --git a/drivers/ras/Kconfig b/drivers/ras/Kconfig
index 6e4aec5..64f09641 100644
--- a/drivers/ras/Kconfig
+++ b/drivers/ras/Kconfig
@@ -1,4 +1,4 @@
# RAS_TRACE always gets selected by whoever wants it.
config RAS_TRACE
def_bool y
- depends on EDAC_MM_EDAC
+ depends on EDAC_MM_EDAC || ACPI_EXTLOG
diff --git a/drivers/ras/ras-traces.c b/drivers/ras/ras-traces.c
index b0c6ed1..197b1ea 100644
--- a/drivers/ras/ras-traces.c
+++ b/drivers/ras/ras-traces.c
@@ -9,4 +9,5 @@
#define TRACE_INCLUDE_PATH ../../include/ras
#include <ras/ras_event.h>
+EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 21cdb0b..dfda854 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -8,6 +8,66 @@
#include <linux/tracepoint.h>
#include <linux/edac.h>
#include <linux/ktime.h>
+#include <linux/cper.h>
+
+/*
+ * MCE Extended Error Log trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event.
+ *
+ */
+
+/* memory trace event */
+
+TRACE_EVENT(extlog_mem_event,
+ TP_PROTO(u32 etype,
+ char *dimm_info,
+ const uuid_le *fru_id,
+ char *fru_text,
+ u64 error_count,
+ u32 severity,
+ u64 phy_addr,
+ char *mem_loc),
+
+ TP_ARGS(etype, dimm_info, fru_id, fru_text, error_count, severity,
+ phy_addr, mem_loc),
+
+ TP_STRUCT__entry(
+ __field(u32, etype)
+ __dynamic_array(char, dimm_info, CPER_REC_LEN)
+ __field(u64, error_count)
+ __field(u32, severity)
+ __field(u64, paddr)
+ __string(mem_loc, mem_loc)
+ __dynamic_array(char, fru, CPER_REC_LEN)
+ ),
+
+ TP_fast_assign(
+ __entry->error_count = error_count;
+ __entry->severity = severity;
+ __entry->etype = etype;
+ if (dimm_info[0] != '\0')
+ snprintf(__get_dynamic_array(dimm_info),
+ CPER_REC_LEN - 1, "%s", dimm_info);
+ else
+ __assign_str(dimm_info, "");
+ __entry->paddr = phy_addr;
+ __assign_str(mem_loc, mem_loc);
+ snprintf(__get_dynamic_array(fru), CPER_REC_LEN - 1,
+ "FRU: %pUl %.20s", fru_id, fru_text);
+ ),
+
+ TP_printk("%llu %s error%s: %s %s physical addr: 0x%016llx%s %s",
+ __entry->error_count,
+ cper_severity_str(__entry->severity),
+ __entry->error_count > 1 ? "s" : "",
+ cper_mem_err_type_str(__entry->etype),
+ __get_str(dimm_info),
+ __entry->paddr,
+ __get_str(mem_loc),
+ __get_str(fru))
+);
/*
* Hardware Events Report
--
1.9.0
next prev parent reply other threads:[~2014-03-28 6:16 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-03-28 5:52 Add new eMCA trace event interface Chen, Gong
2014-03-28 5:52 ` [PATCH 1/5] trace, RAS: Add basic RAS trace event Chen, Gong
2014-04-09 19:46 ` Borislav Petkov
2014-04-14 3:20 ` Chen, Gong
2014-04-14 10:46 ` Borislav Petkov
2014-04-16 6:33 ` Chen, Gong
2014-04-16 13:10 ` Borislav Petkov
2014-03-28 5:52 ` [PATCH 2/5] CPER: Adjust code flow of some functions Chen, Gong
2014-04-14 13:39 ` Borislav Petkov
2014-04-14 14:05 ` Borislav Petkov
2014-04-15 9:24 ` Chen, Gong
2014-04-15 18:02 ` Borislav Petkov
2014-04-16 5:01 ` Chen, Gong
2014-04-16 13:14 ` Borislav Petkov
2014-04-15 9:19 ` Chen, Gong
2014-04-15 18:05 ` Borislav Petkov
2014-04-16 6:23 ` Chen, Gong
2014-04-16 13:28 ` Borislav Petkov
2014-04-17 3:00 ` Chen, Gong
2014-03-28 5:52 ` Chen, Gong [this message]
2014-03-28 5:53 ` [PATCH 4/5] trace, eMCA: Add a knob to adjust where to save event log Chen, Gong
2014-04-03 23:46 ` Tony Luck
2014-04-04 8:05 ` Chen, Gong
2014-04-08 7:59 ` [PATCH 4/5 v2] " Chen, Gong
2014-03-28 5:53 ` [PATCH 5/5] trace, AER: Move trace into unified interface Chen, Gong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1395985981-20476-4-git-send-email-gong.chen@linux.intel.com \
--to=gong.chen@linux.intel.com \
--cc=arozansk@redhat.com \
--cc=bp@alien8.de \
--cc=linux-acpi@vger.kernel.org \
--cc=m.chehab@samsung.com \
--cc=rostedt@goodmis.org \
--cc=tony.luck@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).