linux-acpi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Chen, Gong" <gong.chen@linux.intel.com>
To: tony.luck@intel.com, bp@alien8.de
Cc: linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org, "Chen,
	Gong" <gong.chen@linux.intel.com>
Subject: [PATCH 8/8] ACPI / trace: Add trace interface for eMCA driver
Date: Fri, 11 Oct 2013 02:32:46 -0400	[thread overview]
Message-ID: <1381473166-29303-9-git-send-email-gong.chen@linux.intel.com> (raw)
In-Reply-To: <1381473166-29303-1-git-send-email-gong.chen@linux.intel.com>

Use trace interface to elaborate all H/W error related
information.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/acpi/Kconfig        |   7 ++-
 drivers/acpi/Makefile       |   4 ++
 drivers/acpi/acpi_extlog.c  |  28 +++++++++++-
 drivers/acpi/apei/cper.c    |  13 ++++--
 drivers/acpi/debug_extlog.h |  16 +++++++
 drivers/acpi/extlog_trace.c | 105 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/extlog_trace.h |  77 ++++++++++++++++++++++++++++++++
 include/linux/cper.h        |   2 +
 8 files changed, 246 insertions(+), 6 deletions(-)
 create mode 100644 drivers/acpi/debug_extlog.h
 create mode 100644 drivers/acpi/extlog_trace.c
 create mode 100644 drivers/acpi/extlog_trace.h

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 1465fa8..9ea343e 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -372,12 +372,17 @@ config ACPI_BGRT
 
 source "drivers/acpi/apei/Kconfig"
 
+config EXTLOG_TRACE
+	def_bool n
+
 config ACPI_EXTLOG
 	tristate "Extended Error Log support"
 	depends on X86 && X86_MCE
+	select EXTLOG_TRACE
 	default n
 	help
 	  This driver adds support for decoding extended errors from hardware.
-	  which allows the operating system to obtain data from trace.
+	  which allows the operating system to obtain data from trace. It will
+	  appear under /sys/kernel/debug/tracing/ras/ .
 
 endif	# ACPI
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index bce34af..a6e41b7 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -83,4 +83,8 @@ obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
 
 obj-$(CONFIG_ACPI_APEI)		+= apei/
 
+# extended log support
+acpi-$(CONFIG_EXTLOG_TRACE)	+= extlog_trace.o
+CFLAGS_extlog_trace.o := -I$(src)
+
 obj-$(CONFIG_ACPI_EXTLOG)	+= acpi_extlog.o
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index 3e3e286..ca51eb0 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -26,6 +26,7 @@
 #include <asm/mce.h>
 
 #include "apei/apei-internal.h"
+#include "debug_extlog.h"
 
 #define EXT_ELOG_ENTRY_MASK	0xfffffffffffff /* elog entry address mask */
 
@@ -55,6 +56,8 @@ struct extlog_l1_head {
 
 static u8 extlog_dsm_uuid[] = "663E35AF-CC10-41A4-88EA-5470AF055295";
 
+static const uuid_le invalid_uuid = NULL_UUID_LE;
+
 /* L1 table related physical address */
 static u64 elog_base;
 static size_t elog_size;
@@ -143,7 +146,12 @@ static int print_extlog_rcd(const char *pfx,
 
 static int extlog_print(const char *pfx, int cpu, int bank)
 {
-	struct acpi_generic_status *estatus;
+	struct acpi_generic_status *estatus, *tmp;
+	struct acpi_generic_data *gdata;
+	const uuid_le *fru_id = &invalid_uuid;
+	char *fru_text = "";
+	uuid_le *sec_type;
+	static u64 err_count;
 	int rc;
 
 	estatus = extlog_elog_entry_check(cpu, bank);
@@ -154,7 +162,23 @@ static int extlog_print(const char *pfx, int cpu, int bank)
 	/* clear record status to enable BIOS to update it again */
 	estatus->block_status = 0;
 
-	rc = print_extlog_rcd(pfx, (struct acpi_generic_status *)elog_buf, cpu);
+	tmp = (struct acpi_generic_status *)elog_buf;
+	gdata = (struct acpi_generic_data *)(tmp + 1);
+	rc = print_extlog_rcd(pfx, tmp, cpu);
+
+	/* trace extended error log */
+	err_count++;
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+		fru_id = (uuid_le *)gdata->fru_id;
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+		fru_text = gdata->fru_text;
+	sec_type = (uuid_le *)gdata->section_type;
+	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
+		if (gdata->error_data_length >= sizeof(*mem_err))
+			trace_mem_error(fru_id, fru_text, err_count,
+					gdata->error_severity, mem_err);
+	}
 
 	return rc;
 }
diff --git a/drivers/acpi/apei/cper.c b/drivers/acpi/apei/cper.c
index 567410e..0b4cfad 100644
--- a/drivers/acpi/apei/cper.c
+++ b/drivers/acpi/apei/cper.c
@@ -56,11 +56,12 @@ static const char *cper_severity_strs[] = {
 	"info",
 };
 
-static const char *cper_severity_str(unsigned int severity)
+const char *cper_severity_str(unsigned int severity)
 {
 	return severity < ARRAY_SIZE(cper_severity_strs) ?
 		cper_severity_strs[severity] : "unknown";
 }
+EXPORT_SYMBOL_GPL(cper_severity_str);
 
 /*
  * cper_print_bits - print strings for set bits
@@ -195,6 +196,13 @@ static const char *cper_mem_err_type_strs[] = {
 	"Physical Memory Map-out event",
 };
 
+const char *cper_mem_err_type_str(unsigned int etype)
+{
+	return etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
+		cper_mem_err_type_strs[etype] : "unknown";
+}
+EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
+
 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 {
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
@@ -232,8 +240,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
 		u8 etype = mem->error_type;
 		printk("%s""error_type: %d, %s\n", pfx, etype,
-		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
-		       cper_mem_err_type_strs[etype] : "unknown");
+			cper_mem_err_type_str(etype));
 	}
 	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
 		const char *bank = NULL, *device = NULL;
diff --git a/drivers/acpi/debug_extlog.h b/drivers/acpi/debug_extlog.h
new file mode 100644
index 0000000..67bb2c5
--- /dev/null
+++ b/drivers/acpi/debug_extlog.h
@@ -0,0 +1,16 @@
+#ifndef __DEBUG_EXTLOG_H
+#define __DEBUG_EXTLOG_H
+
+#include <linux/cper.h>
+
+#ifdef CONFIG_EXTLOG_TRACE
+extern void trace_mem_error(const uuid_le *fru_id, char *fru_text,
+		u64 err_count, u32 severity, struct cper_sec_mem_err *mem);
+#else
+void trace_mem_error(const uuid_le *fru_id, char *fru_text,
+		u64 err_count, u32 severity, struct cper_sec_mem_err *mem)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/acpi/extlog_trace.c b/drivers/acpi/extlog_trace.c
new file mode 100644
index 0000000..2b2824c
--- /dev/null
+++ b/drivers/acpi/extlog_trace.c
@@ -0,0 +1,105 @@
+#include <linux/export.h>
+#include <linux/dmi.h>
+#include "debug_extlog.h"
+
+#define CREATE_TRACE_POINTS
+#include "extlog_trace.h"
+
+static char mem_location[LOC_LEN];
+static char dimm_location[LOC_LEN];
+
+static void mem_err_location(struct cper_sec_mem_err *mem)
+{
+	char *p;
+	u32 n = 0;
+
+	memset(mem_location, 0, LOC_LEN);
+	p = mem_location;
+	if (mem->validation_bits & CPER_MEM_VALID_NODE)
+		n += sprintf(p + n, " node: %d", mem->node);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_CARD)
+		n += sprintf(p + n, " card: %d", mem->card);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
+		n += sprintf(p + n, " module: %d", mem->module);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
+		n += sprintf(p + n, " rank: %d", mem->rank);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_BANK)
+		n += sprintf(p + n, " bank: %d", mem->bank);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
+		n += sprintf(p + n, " device: %d", mem->device);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_ROW)
+		n += sprintf(p + n, " row: %d", mem->row);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
+		n += sprintf(p + n, " column: %d", mem->column);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
+		n += sprintf(p + n, " bit_position: %d", mem->bit_pos);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
+		n += sprintf(p + n, " requestor_id: 0x%016llx",
+				mem->requestor_id);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
+		n += sprintf(p + n, " responder_id: 0x%016llx",
+				mem->responder_id);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
+		n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id);
+end:
+	return;
+}
+
+static void dimm_err_location(struct cper_sec_mem_err *mem)
+{
+	memset(dimm_location, 0, LOC_LEN);
+	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
+		const char *bank = NULL, *device = NULL;
+		dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
+		if (bank != NULL && device != NULL)
+			snprintf(dimm_location, LOC_LEN - 1,
+				"%s %s", bank, device);
+		else
+			snprintf(dimm_location, LOC_LEN - 1,
+				"DMI handle: 0x%.4x", mem->mem_dev_handle);
+	}
+}
+
+void trace_mem_error(const uuid_le *fru_id, char *fru_text,
+		u64 err_count, u32 severity, struct cper_sec_mem_err *mem)
+{
+	u32 etype = ~0U;
+	u64 phy_addr = 0;
+
+	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
+		etype = mem->error_type;
+	if (mem->validation_bits & CPER_MEM_VALID_PHYSICAL_ADDRESS) {
+		phy_addr = mem->physical_addr;
+		if (mem->validation_bits &
+				CPER_MEM_VALID_PHYSICAL_ADDRESS_MASK)
+			phy_addr &= mem->physical_addr_mask;
+	}
+	mem_err_location(mem);
+	dimm_err_location(mem);
+
+	trace_extlog_mem_event(etype, dimm_location, fru_id, fru_text,
+			err_count, severity, phy_addr, mem_location);
+}
+EXPORT_SYMBOL_GPL(trace_mem_error);
diff --git a/drivers/acpi/extlog_trace.h b/drivers/acpi/extlog_trace.h
new file mode 100644
index 0000000..21f0887
--- /dev/null
+++ b/drivers/acpi/extlog_trace.h
@@ -0,0 +1,77 @@
+#if !defined(_TRACE_EXTLOG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_EXTLOG_H
+
+#include <linux/tracepoint.h>
+#include <linux/cper.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM extlog
+
+/*
+ * MCE Extended Error Log Trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event.
+ *
+ */
+
+/* memory trace event */
+
+#define LOC_LEN		512
+#define MSG_LEN		((LOC_LEN) * 2)
+
+TRACE_EVENT(extlog_mem_event,
+	TP_PROTO(u32 etype,
+		char *dimm_loc,
+		const uuid_le *fru_id,
+		char *fru_text,
+		u64 error_count,
+		u32 severity,
+		u64 phy_addr,
+		char *mem_loc),
+
+	TP_ARGS(etype, dimm_loc, fru_id, fru_text, error_count, severity,
+		phy_addr, mem_loc),
+
+	TP_STRUCT__entry(
+		__field(u32, etype)
+		__dynamic_array(char, dimm_info, LOC_LEN)
+		__field(u64, error_count)
+		__field(u32, severity)
+		__dynamic_array(char, msg, MSG_LEN)
+	),
+
+	TP_fast_assign(
+		__entry->error_count = error_count;
+		__entry->severity = severity;
+		__entry->etype = etype;
+		if (dimm_loc[0] != '\0')
+			snprintf(__get_dynamic_array(dimm_info), LOC_LEN - 1,
+				"on %s", dimm_loc);
+		else
+			__assign_str(dimm_info, "");
+		if (phy_addr != 0)
+			snprintf(__get_dynamic_array(msg), MSG_LEN - 1,
+				"(FRU: %pUl %.20s physical addr: 0x%016llx%s)",
+				fru_id, fru_text, phy_addr, mem_loc);
+		else
+			__assign_str(msg, "");
+	),
+
+	TP_printk("%llu %s error%s:%s %s%s",
+			__entry->error_count,
+			cper_severity_str(__entry->severity),
+			__entry->error_count > 1 ? "s" : "",
+			cper_mem_err_type_str(__entry->etype),
+			__get_str(dimm_info),
+			__get_str(msg))
+);
+
+#endif /* _TRACE_EXTLOG_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE extlog_trace
+#include <trace/define_trace.h>
diff --git a/include/linux/cper.h b/include/linux/cper.h
index bd01c9a..c00eb55 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -395,6 +395,8 @@ struct cper_sec_pcie {
 #pragma pack()
 
 u64 cper_next_record_id(void);
+const char *cper_severity_str(unsigned int);
+const char *cper_mem_err_type_str(unsigned int);
 void cper_print_bits(const char *prefix, unsigned int bits,
 		     const char *strs[], unsigned int strs_size);
 
-- 
1.8.4.rc3

  parent reply	other threads:[~2013-10-11  6:32 UTC|newest]

Thread overview: 85+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-11  6:32 Extended H/W error log driver Chen, Gong
2013-10-11  6:32 ` [PATCH 1/8] ACPI, APEI, CPER: Fix status check during error printing Chen, Gong
2013-10-11  8:50   ` Borislav Petkov
2013-10-11  6:32 ` [PATCH 2/8] ACPI, CPER: Update cper info Chen, Gong
2013-10-11  9:06   ` Borislav Petkov
2013-10-11 15:47     ` Borislav Petkov
2013-10-16  1:57       ` Joe Perches
2013-10-16  2:46         ` Chen Gong
2013-10-16  3:10           ` Joe Perches
2013-10-15 18:17   ` Naveen N. Rao
2013-10-16  1:39     ` Chen Gong
2013-10-17 12:21       ` Naveen N. Rao
2013-10-18 11:06         ` Naveen N. Rao
2013-10-11  6:32 ` [PATCH 3/8] ACPI, x86: Extended error log driver for x86 platform Chen, Gong
2013-10-11 15:24   ` Borislav Petkov
2013-10-14  3:16     ` Chen Gong
2013-10-14 10:26       ` Borislav Petkov
2013-10-14 13:03         ` Chen Gong
2013-10-14 13:28           ` Borislav Petkov
2013-10-14 16:50         ` Tony Luck
2013-10-14 17:07           ` Borislav Petkov
2013-10-14 17:16             ` Tony Luck
2013-10-11  6:32 ` [PATCH 4/8] DMI: Parse memory device (type 17) in SMBIOS Chen, Gong
2013-10-11 15:40   ` Borislav Petkov
2013-10-14  3:21     ` Chen Gong
2013-10-14 10:30       ` Borislav Petkov
2013-10-15 19:00   ` Naveen N. Rao
2013-10-11  6:32 ` [PATCH 5/8] ACPI, APEI, CPER: Add UEFI 2.4 support for memory error Chen, Gong
2013-10-11 15:41   ` Borislav Petkov
2013-10-15 17:26   ` Naveen N. Rao
2013-10-16  1:35     ` Chen Gong
2013-10-11  6:32 ` [PATCH 6/8] ACPI, APEI, CPER: Enhance memory reporting capability Chen, Gong
2013-10-11 15:49   ` Borislav Petkov
2013-10-15 19:18   ` Naveen N. Rao
2013-10-11  6:32 ` [PATCH 7/8] ACPI, APEI, CPER: Cleanup CPER memory error output format Chen, Gong
2013-10-11 16:02   ` Borislav Petkov
2013-10-14  4:55     ` Chen Gong
2013-10-14 10:36       ` Borislav Petkov
2013-10-14 17:12         ` Tony Luck
2013-10-14 18:47           ` Borislav Petkov
2013-10-14 21:03             ` Tony Luck
2013-10-14 21:50               ` Borislav Petkov
2013-10-15  9:18                 ` Chen Gong
2013-10-15 10:13                   ` Borislav Petkov
2013-10-15 11:28           ` Naveen N. Rao
2013-10-15 11:41           ` Naveen N. Rao
2013-10-15 12:29             ` Borislav Petkov
2013-10-15 16:42               ` Joe Perches
2013-10-15 16:49                 ` Tony Luck
2013-10-15 16:56                   ` Borislav Petkov
2013-10-11  6:32 ` Chen, Gong [this message]
2013-10-11  7:52   ` [PATCH 8/8] ACPI / trace: Add trace interface for eMCA driver Borislav Petkov
2013-10-11 16:14   ` Borislav Petkov
2013-10-14  7:07     ` Chen Gong
2013-10-15 16:54   ` Naveen N. Rao
2013-10-15 17:00     ` Borislav Petkov
2013-10-15 17:30       ` Naveen N. Rao
2013-10-15 17:47         ` Borislav Petkov
2013-10-16  0:43         ` Mauro Carvalho Chehab
2013-10-16  9:16           ` Borislav Petkov
2013-10-16 10:35             ` Mauro Carvalho Chehab
2013-10-16 10:42               ` Borislav Petkov
2013-10-16 11:55                 ` Mauro Carvalho Chehab
2013-10-16 12:20                   ` Borislav Petkov
2013-10-16 20:47                   ` Luck, Tony
2013-10-17 10:34                     ` Mauro Carvalho Chehab
2013-10-17 21:35                       ` Luck, Tony
2013-10-16 20:35               ` Luck, Tony
2013-10-17 10:32                 ` Mauro Carvalho Chehab
2013-10-16  9:50     ` Chen Gong
2013-10-16 10:49       ` Borislav Petkov
2013-10-18 11:04         ` Naveen N. Rao
2013-10-11  7:00 ` Extended H/W error log driver Joe Perches
2013-10-11  8:04 ` Borislav Petkov
2013-10-11 14:54   ` Luck, Tony
2013-10-11 15:27     ` Borislav Petkov
2013-10-14  6:49   ` Chen Gong
2013-10-14 10:55     ` Borislav Petkov
2013-10-15  4:07       ` Chen Gong
2013-10-15  9:28         ` Borislav Petkov
2013-10-15 16:15           ` Tony Luck
2013-10-15 19:10             ` Naveen N. Rao
2013-10-15 19:23               ` Borislav Petkov
2013-10-17 12:07                 ` Naveen N. Rao
2013-10-17 13:04                   ` Borislav Petkov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1381473166-29303-9-git-send-email-gong.chen@linux.intel.com \
    --to=gong.chen@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).