trace, RAS: New eMCA trace event interface

public inbox for linux-acpi@vger.kernel.org
 help / color / mirror / Atom feed

* trace, RAS: New eMCA trace event interface
@ 2014-03-04  9:23 Chen, Gong
  2014-03-04  9:23 ` [PATCH 1/2] trace, RAS: Add basic RAS trace event Chen, Gong
                   ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Chen, Gong @ 2014-03-04  9:23 UTC (permalink / raw)
  To: tony.luck, bp, m.chehab; +Cc: arozansk, linux-acpi

[PATCH 1/2] trace, RAS: Add basic RAS trace event
[PATCH 2/2] trace, RAS: Add eMCA trace event interface

This patch series adds new eMCA trace event interface.
To avoid conflict with existed interface, I add one
new unified trace event stub in the kernel.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* [PATCH 1/2] trace, RAS: Add basic RAS trace event
  2014-03-04  9:23 trace, RAS: New eMCA trace event interface Chen, Gong
@ 2014-03-04  9:23 ` Chen, Gong
  2014-03-06 11:18   ` Borislav Petkov
  2014-03-04  9:23 ` [PATCH 2/2] trace, RAS: Add eMCA trace event interface Chen, Gong
  2014-03-04 17:54 ` trace, RAS: New " Luck, Tony
  2 siblings, 1 reply; 26+ messages in thread
From: Chen, Gong @ 2014-03-04  9:23 UTC (permalink / raw)
  To: tony.luck, bp, m.chehab; +Cc: arozansk, linux-acpi, Chen, Gong

To avoid the confuision of usage for RAS related trace event, add
an unified RAS trace event stub.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/edac/edac_mc.c    |  3 ---
 kernel/trace/Makefile     |  1 +
 kernel/trace/ras-traces.c | 12 ++++++++++++
 3 files changed, 13 insertions(+), 3 deletions(-)
 create mode 100644 kernel/trace/ras-traces.c

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 33edd67..28c1695 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -33,9 +33,6 @@
 #include <asm/edac.h>
 #include "edac_core.h"
 #include "edac_module.h"
-
-#define CREATE_TRACE_POINTS
-#define TRACE_INCLUDE_PATH ../../include/ras
 #include <ras/ras_event.h>
 
 /* lock to memory controller's control array */
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 1378e84..167193a 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -52,6 +52,7 @@ endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
+obj-$(CONFIG_TRACEPOINTS) += ras-traces.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
 ifeq ($(CONFIG_PM_RUNTIME),y)
 obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
diff --git a/kernel/trace/ras-traces.c b/kernel/trace/ras-traces.c
new file mode 100644
index 0000000..b0c6ed1
--- /dev/null
+++ b/kernel/trace/ras-traces.c
@@ -0,0 +1,12 @@
+/*
+ * Copyright (C) 2014 Intel Corporation
+ *
+ * Authors:
+ *	Chen, Gong <gong.chen@linux.intel.com>
+ */
+
+#define CREATE_TRACE_POINTS
+#define TRACE_INCLUDE_PATH ../../include/ras
+#include <ras/ras_event.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
-- 
1.8.4.3


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-04  9:23 trace, RAS: New eMCA trace event interface Chen, Gong
  2014-03-04  9:23 ` [PATCH 1/2] trace, RAS: Add basic RAS trace event Chen, Gong
@ 2014-03-04  9:23 ` Chen, Gong
  2014-03-07 11:44   ` Borislav Petkov
  2014-03-04 17:54 ` trace, RAS: New " Luck, Tony
  2 siblings, 1 reply; 26+ messages in thread
From: Chen, Gong @ 2014-03-04  9:23 UTC (permalink / raw)
  To: tony.luck, bp, m.chehab; +Cc: arozansk, linux-acpi, Chen, Gong

Add trace interface to elaborate all H/W error related information.

Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
---
 drivers/acpi/Kconfig        |   3 +-
 drivers/acpi/Makefile       |   1 +
 drivers/acpi/acpi_extlog.c  | 131 +++++++++++++++++++++++++++++++++++++++++++-
 drivers/firmware/efi/cper.c |  13 ++++-
 include/linux/cper.h        |   2 +
 include/ras/ras_event.h     |  62 +++++++++++++++++++++
 kernel/trace/ras-traces.c   |   1 +
 7 files changed, 208 insertions(+), 5 deletions(-)

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 4770de5..3e569d4 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -363,6 +363,7 @@ config ACPI_EXTLOG
 
 	  Enhanced MCA Logging allows firmware to provide additional error
 	  information to system software, synchronous with MCE or CMCI. This
-	  driver adds support for that functionality.
+	  driver adds support for that functionality with corresponding
+	  tracepoint which carries that information to userspace.
 
 endif	# ACPI
diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index 0331f91..f6abc4a 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -82,4 +82,5 @@ obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
 
 obj-$(CONFIG_ACPI_APEI)		+= apei/
 
+CFLAGS_acpi_extlog.o := -I$(src)
 obj-$(CONFIG_ACPI_EXTLOG)	+= acpi_extlog.o
diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
index c4a5d87..fbdebad 100644
--- a/drivers/acpi/acpi_extlog.c
+++ b/drivers/acpi/acpi_extlog.c
@@ -14,8 +14,10 @@
 #include <linux/edac.h>
 #include <asm/cpu.h>
 #include <asm/mce.h>
+#include <linux/dmi.h>
 
 #include "apei/apei-internal.h"
+#include <ras/ras_event.h>
 
 #define EXT_ELOG_ENTRY_MASK	GENMASK_ULL(51, 0) /* elog entry address mask */
 
@@ -44,6 +46,11 @@ struct extlog_l1_head {
 static int old_edac_report_status;
 
 static u8 extlog_dsm_uuid[] __initdata = "663E35AF-CC10-41A4-88EA-5470AF055295";
+static const uuid_le invalid_uuid = NULL_UUID_LE;
+
+static DEFINE_RAW_SPINLOCK(trace_lock);
+static char mem_location[LOC_LEN];
+static char dimm_location[LOC_LEN];
 
 /* L1 table related physical address */
 static u64 elog_base;
@@ -69,6 +76,106 @@ static u32 l1_percpu_entry;
 #define ELOG_ENTRY_ADDR(phyaddr) \
 	(phyaddr - elog_base + (u8 *)elog_addr)
 
+static void mem_err_location(struct cper_sec_mem_err *mem)
+{
+	char *p;
+	u32 n = 0;
+
+	memset(mem_location, 0, LOC_LEN);
+	p = mem_location;
+	if (mem->validation_bits & CPER_MEM_VALID_NODE)
+		n += sprintf(p + n, " node: %d", mem->node);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_CARD)
+		n += sprintf(p + n, " card: %d", mem->card);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
+		n += sprintf(p + n, " module: %d", mem->module);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
+		n += sprintf(p + n, " rank: %d", mem->rank);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_BANK)
+		n += sprintf(p + n, " bank: %d", mem->bank);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
+		n += sprintf(p + n, " device: %d", mem->device);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_ROW)
+		n += sprintf(p + n, " row: %d", mem->row);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
+		n += sprintf(p + n, " column: %d", mem->column);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
+		n += sprintf(p + n, " bit_position: %d", mem->bit_pos);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
+		n += sprintf(p + n, " requestor_id: 0x%016llx",
+				mem->requestor_id);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
+		n += sprintf(p + n, " responder_id: 0x%016llx",
+				mem->responder_id);
+	if (n >= LOC_LEN)
+		goto end;
+	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
+		n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id);
+end:
+	return;
+}
+
+static void dimm_err_location(struct cper_sec_mem_err *mem)
+{
+	const char *bank = NULL, *device = NULL;
+
+	memset(dimm_location, 0, LOC_LEN);
+	if (!(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
+		return;
+
+	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
+	if (bank != NULL && device != NULL)
+		snprintf(dimm_location, LOC_LEN - 1, "%s %s", bank, device);
+	else
+		snprintf(dimm_location, LOC_LEN - 1, "DMI handle: 0x%.4x",
+			 mem->mem_dev_handle);
+}
+
+static void trace_mem_error(const uuid_le *fru_id, char *fru_text,
+			    u64 err_count, u32 severity,
+			    struct cper_sec_mem_err *mem)
+{
+	u32 etype = ~0U;
+	u64 phy_addr = ~0ull;
+	unsigned long flags;
+
+	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
+		etype = mem->error_type;
+	if (mem->validation_bits & CPER_MEM_VALID_PA) {
+		phy_addr = mem->physical_addr;
+		if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
+			phy_addr &= mem->physical_addr_mask;
+	}
+
+	raw_spin_lock_irqsave(&trace_lock, flags);
+	mem_err_location(mem);
+	dimm_err_location(mem);
+
+	trace_extlog_mem_event(etype, dimm_location, fru_id, fru_text,
+			       err_count, severity, phy_addr, mem_location);
+	raw_spin_unlock_irqrestore(&trace_lock, flags);
+}
+
 static struct acpi_generic_status *extlog_elog_entry_check(int cpu, int bank)
 {
 	int idx;
@@ -137,7 +244,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	struct mce *mce = (struct mce *)data;
 	int	bank = mce->bank;
 	int	cpu = mce->extcpu;
-	struct acpi_generic_status *estatus;
+	struct acpi_generic_status *estatus, *tmp;
+	struct acpi_generic_data *gdata;
+	const uuid_le *fru_id = &invalid_uuid;
+	char *fru_text = "";
+	uuid_le *sec_type;
+	static u64 err_count;
 	int rc;
 
 	estatus = extlog_elog_entry_check(cpu, bank);
@@ -149,6 +261,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
 	estatus->block_status = 0;
 
 	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
+	tmp = (struct acpi_generic_status *)elog_buf;
+	gdata = (struct acpi_generic_data *)(tmp + 1);
+	rc = print_extlog_rcd(NULL, tmp, cpu);
+
+	/* trace extended error log */
+	err_count++;
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
+		fru_id = (uuid_le *)gdata->fru_id;
+	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
+		fru_text = gdata->fru_text;
+	sec_type = (uuid_le *)gdata->section_type;
+	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
+		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
+		if (gdata->error_data_length >= sizeof(*mem_err))
+			trace_mem_error(fru_id, fru_text, err_count,
+					gdata->error_severity, mem_err);
+	}
 
 	return NOTIFY_STOP;
 }
diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
index 1491dd4..9d3e2c4 100644
--- a/drivers/firmware/efi/cper.c
+++ b/drivers/firmware/efi/cper.c
@@ -57,11 +57,12 @@ static const char *cper_severity_strs[] = {
 	"info",
 };
 
-static const char *cper_severity_str(unsigned int severity)
+const char *cper_severity_str(unsigned int severity)
 {
 	return severity < ARRAY_SIZE(cper_severity_strs) ?
 		cper_severity_strs[severity] : "unknown";
 }
+EXPORT_SYMBOL_GPL(cper_severity_str);
 
 /*
  * cper_print_bits - print strings for set bits
@@ -196,6 +197,13 @@ static const char *cper_mem_err_type_strs[] = {
 	"physical memory map-out event",
 };
 
+const char *cper_mem_err_type_str(unsigned int etype)
+{
+	return etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
+		cper_mem_err_type_strs[etype] : "unknown";
+}
+EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
+
 static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 {
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
@@ -233,8 +241,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
 	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
 		u8 etype = mem->error_type;
 		printk("%s""error_type: %d, %s\n", pfx, etype,
-		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
-		       cper_mem_err_type_strs[etype] : "unknown");
+			cper_mem_err_type_str(etype));
 	}
 	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
 		const char *bank = NULL, *device = NULL;
diff --git a/include/linux/cper.h b/include/linux/cper.h
index 2fc0ec3..c6d87fc 100644
--- a/include/linux/cper.h
+++ b/include/linux/cper.h
@@ -395,6 +395,8 @@ struct cper_sec_pcie {
 #pragma pack()
 
 u64 cper_next_record_id(void);
+const char *cper_severity_str(unsigned int);
+const char *cper_mem_err_type_str(unsigned int);
 void cper_print_bits(const char *prefix, unsigned int bits,
 		     const char * const strs[], unsigned int strs_size);
 
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 21cdb0b..97f2192 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -8,6 +8,68 @@
 #include <linux/tracepoint.h>
 #include <linux/edac.h>
 #include <linux/ktime.h>
+#include <linux/cper.h>
+
+/*
+ * MCE Extended Error Log trace event
+ *
+ * These events are generated when hardware detects a corrected or
+ * uncorrected event.
+ *
+ */
+
+/* memory trace event */
+
+#define LOC_LEN		512
+
+TRACE_EVENT(extlog_mem_event,
+	TP_PROTO(u32 etype,
+		 char *dimm_info,
+		 const uuid_le *fru_id,
+		 char *fru_text,
+		 u64 error_count,
+		 u32 severity,
+		 u64 phy_addr,
+		 char *mem_loc),
+
+	TP_ARGS(etype, dimm_info, fru_id, fru_text, error_count, severity,
+		phy_addr, mem_loc),
+
+	TP_STRUCT__entry(
+		__field(u32, etype)
+		__dynamic_array(char, dimm_info, LOC_LEN)
+		__field(u64, error_count)
+		__field(u32, severity)
+		__field(u64, paddr)
+		__string(mem_loc, mem_loc)
+		__dynamic_array(char, fru, LOC_LEN)
+	),
+
+	TP_fast_assign(
+		__entry->error_count = error_count;
+		__entry->severity = severity;
+		__entry->etype = etype;
+		if (dimm_info[0] != '\0')
+			snprintf(__get_dynamic_array(dimm_info), LOC_LEN - 1,
+				 "%s", dimm_info);
+		else
+			__assign_str(dimm_info, "");
+		__entry->paddr = phy_addr;
+		__assign_str(mem_loc, mem_loc);
+		snprintf(__get_dynamic_array(fru), LOC_LEN - 1,
+			 "FRU: %pUl %.20s", fru_id, fru_text);
+	),
+
+	TP_printk("%llu %s error%s: %s %s physical addr: 0x%016llx%s %s",
+		  __entry->error_count,
+		  cper_severity_str(__entry->severity),
+		  __entry->error_count > 1 ? "s" : "",
+		  cper_mem_err_type_str(__entry->etype),
+		  __get_str(dimm_info),
+		  __entry->paddr,
+		  __get_str(mem_loc),
+		  __get_str(fru))
+);
 
 /*
  * Hardware Events Report
diff --git a/kernel/trace/ras-traces.c b/kernel/trace/ras-traces.c
index b0c6ed1..197b1ea 100644
--- a/kernel/trace/ras-traces.c
+++ b/kernel/trace/ras-traces.c
@@ -9,4 +9,5 @@
 #define TRACE_INCLUDE_PATH ../../include/ras
 #include <ras/ras_event.h>
 
+EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
 EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
-- 
1.8.4.3


^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: trace, RAS: New eMCA trace event interface
  2014-03-04  9:23 trace, RAS: New eMCA trace event interface Chen, Gong
  2014-03-04  9:23 ` [PATCH 1/2] trace, RAS: Add basic RAS trace event Chen, Gong
  2014-03-04  9:23 ` [PATCH 2/2] trace, RAS: Add eMCA trace event interface Chen, Gong
@ 2014-03-04 17:54 ` Luck, Tony
  2014-03-07  9:10   ` Mauro Carvalho Chehab
  2 siblings, 1 reply; 26+ messages in thread
From: Luck, Tony @ 2014-03-04 17:54 UTC (permalink / raw)
  To: Chen, Gong, bp, m.chehab; +Cc: arozansk, linux-acpi

Here is (most of) the matching patch to rasdaemon to collect
messages from this trace.  Rasdaemon lives at:
	git://git.fedorahosted.org/rasdaemon.git

Missing part is saving to sqlite data base - boiler plate is present,
but the specifics to create tables and save to them need to be added.
But this should serve as a useful test bed while we argue the punctuation
in the fields supplied by the kernel trace.

Note: There are few platforms that fully support this at the moment. One
is the "Brickland Ivy Bridge -EX" (Running BIOS 0046.R04 or newer). Another
is the "Grantley Haswell -EP" (Running BIOS 0026 ... which I don't think is
out yet).  Be sure to enable eMCA options in BIOS setup EDKII>Advanced>System Event Log.

-Tony

---


From: "Luck, Tony" <tony.luck@intel.com>

[RASDAEMON] Add support for extlog events reported by the kernel

Kernel may report extra platform specific information provided by
BIOS. Capture it and log it.

TODO: log to the SQLITE data base - just top level hooks present
in this patch.

Signed-off-by: Tony Luck <tony.luck@intel.com>

---

diff --git a/Makefile.am b/Makefile.am
index c1668b4d939d..2f81d777823c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -17,13 +17,17 @@ if WITH_MCE
 			mce-intel-dunnington.c mce-intel-tulsa.c \
 			mce-intel-sb.c mce-intel-ivb.c
 endif
+if WITH_EXTLOG
+   rasdaemon_SOURCES += ras-extlog-handler.c
+endif
 if WITH_ABRT_REPORT
    rasdaemon_SOURCES += ras-report.c
 endif
 rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
 
 include_HEADERS = config.h  ras-events.h  ras-logger.h  ras-mc-handler.h \
-		  ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h
+		  ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \
+		  ras-extlog-handler.h
 
 # This rule can't be called with more than one Makefile job (like make -j8)
 # I can't figure out a way to fix that
diff --git a/configure.ac b/configure.ac
index 5bd824c0dbe2..b050ad864cdf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -53,6 +53,15 @@ AS_IF([test "x$enable_mce" = "xyes"], [
 ])
 AM_CONDITIONAL([WITH_MCE], [test x$enable_mce = xyes])
 
+AC_ARG_ENABLE([extlog],
+    AS_HELP_STRING([--enable-extlog], [enable EXTLOG events (currently experimental)]))
+
+AS_IF([test "x$enable_extlog" = "xyes"], [
+  AC_DEFINE(HAVE_EXTLOG,1,"have EXTLOG events collect")
+  AC_SUBST([WITH_EXTLOG])
+])
+AM_CONDITIONAL([WITH_EXTLOG], [test x$enable_extlog = xyes])
+
 AC_ARG_ENABLE([abrt_report],
     AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)]))
 
diff --git a/ras-events.c b/ras-events.c
index ecbbd3afa66d..90ea3727f667 100644
--- a/ras-events.c
+++ b/ras-events.c
@@ -30,6 +30,7 @@
 #include "ras-mc-handler.h"
 #include "ras-aer-handler.h"
 #include "ras-mce-handler.h"
+#include "ras-extlog-handler.h"
 #include "ras-record.h"
 #include "ras-logger.h"
 
@@ -203,6 +204,10 @@ int toggle_ras_mc_event(int enable)
 	rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable);
 #endif
 
+#ifdef HAVE_EXTLOG
+	rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable);
+#endif
+
 free_ras:
 	free(ras);
 	return rc;
@@ -688,6 +693,17 @@ int handle_ras_events(int record_events)
 		    "mce", "mce_record");
 	}
 #endif
+
+#ifdef HAVE_EXTLOG
+	rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event",
+			       ras_extlog_mem_event_handler);
+	if (!rc)
+		num_events++;
+	else
+		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
+		    "ras", "aer_event");
+#endif
+
 	if (!num_events) {
 		log(ALL, LOG_INFO,
 		    "Failed to trace all supported RAS events. Aborting.\n");
diff --git a/ras-extlog-handler.c b/ras-extlog-handler.c
new file mode 100644
index 000000000000..1166a5341703
--- /dev/null
+++ b/ras-extlog-handler.c
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2014 Tony Luck <tony.luck@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+#include <ctype.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include "libtrace/kbuffer.h"
+#include "ras-extlog-handler.h"
+#include "ras-record.h"
+#include "ras-logger.h"
+#include "ras-report.h"
+
+static char *err_type(int etype)
+{
+	switch (etype) {
+	case 0: return "unknown";
+	case 1: return "no error";
+	case 2: return "single-bit ECC";
+	case 3: return "multi-bit ECC";
+	case 4: return "single-symbol chipkill ECC";
+	case 5: return "multi-symbol chipkill ECC";
+	case 6: return "master abort";
+	case 7: return "target abort";
+	case 8: return "parity error";
+	case 9: return "watchdog timeout";
+	case 10: return "invalid address";
+	case 11: return "mirror Broken";
+	case 12: return "memory sparing";
+	case 13: return "scrub corrected error";
+	case 14: return "scrub uncorrected error";
+	case 15: return "physical memory map-out event";
+	}
+	return "unknown-type";
+}
+
+static char *err_severity(int severity)
+{
+	switch (severity) {
+	case 0: return "recoverable";
+	case 1: return "fatal";
+	case 2: return "corrected";
+	case 3: return "informational";
+	}
+	return "unknown-severity";
+}
+
+static void report_extlog_mem_event(struct ras_events *ras,
+				    struct pevent_record *record,
+				    struct trace_seq *s,
+				    struct ras_extlog_event *ev)
+{
+	trace_seq_printf(s, "%d %s error: %s %s physical addr: 0x%llx (%s %s)",
+		ev->error_count, err_severity(ev->severity),
+		err_type(ev->etype), ev->dimm_info, ev->address,
+		ev->mem_loc, ev->fru);
+}
+
+int ras_extlog_mem_event_handler(struct trace_seq *s,
+			  struct pevent_record *record,
+			  struct event_format *event, void *context)
+{
+	int len;
+	unsigned long long val;
+	struct ras_events *ras = context;
+	time_t now;
+	struct tm *tm;
+	struct ras_extlog_event ev;
+
+	/*
+	 * Newer kernels (3.10-rc1 or upper) provide an uptime clock.
+	 * On previous kernels, the way to properly generate an event would
+	 * be to inject a fake one, measure its timestamp and diff it against
+	 * gettimeofday. We won't do it here. Instead, let's use uptime,
+	 * falling-back to the event report's time, if "uptime" clock is
+	 * not available (legacy kernels).
+	 */
+
+	if (ras->use_uptime)
+		now = record->ts/1000000000L + ras->uptime_diff;
+	else
+		now = time(NULL);
+
+	tm = localtime(&now);
+	if (tm)
+		strftime(ev.timestamp, sizeof(ev.timestamp),
+			 "%Y-%m-%d %H:%M:%S %z", tm);
+	trace_seq_printf(s, "%s ", ev.timestamp);
+
+	if (pevent_get_field_val(s,  event, "etype", record, &val, 1) < 0)
+		return -1;
+	ev.etype = val;
+	if (pevent_get_field_val(s,  event, "error_count", record, &val, 1) < 0)
+		return -1;
+	ev.error_count = val;
+	if (pevent_get_field_val(s,  event, "severity", record, &val, 1) < 0)
+		return -1;
+	ev.severity = val;
+	if (pevent_get_field_val(s,  event, "paddr", record, &val, 1) < 0)
+		return -1;
+	ev.address = val;
+
+	ev.dimm_info = pevent_get_field_raw(s, event, "dimm_info",
+					   record, &len, 1);
+	ev.mem_loc = pevent_get_field_raw(s, event, "mem_loc",
+					   record, &len, 1);
+	ev.fru = pevent_get_field_raw(s, event, "fru",
+					   record, &len, 1);
+
+	report_extlog_mem_event(ras, record, s, &ev);
+
+#ifdef HAVE_SQLITE3
+	ras_store_extlog_mem_record(ras, &ev);
+#endif
+
+	return 0;
+}
diff --git a/ras-extlog-handler.h b/ras-extlog-handler.h
new file mode 100644
index 000000000000..54e8cec93af9
--- /dev/null
+++ b/ras-extlog-handler.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (C) 2014 Tony Luck <tony.luck@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+*/
+
+#ifndef __RAS_EXTLOG_HANDLER_H
+#define __RAS_EXTLOG_HANDLER_H
+
+#include <stdint.h>
+
+#include "ras-events.h"
+#include "libtrace/event-parse.h"
+
+extern int ras_extlog_mem_event_handler(struct trace_seq *s,
+			  struct pevent_record *record,
+			  struct event_format *event, void *context);
+
+#endif
diff --git a/ras-record.c b/ras-record.c
index daa3cb102883..6b4cf548bb46 100644
--- a/ras-record.c
+++ b/ras-record.c
@@ -157,6 +157,28 @@ int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev)
 }
 #endif
 
+#ifdef HAVE_EXTLOG
+static const struct db_fields extlog_event_fields[] = {
+		{ .name="id",			.type="INTEGER PRIMARY KEY" },
+		{ .name="timestamp",		.type="TEXT" },
+};
+
+static const struct db_table_descriptor extlog_event_tab = {
+	.name = "extlog_event",
+	.fields = extlog_event_fields,
+	.num_fields = ARRAY_SIZE(extlog_event_fields),
+};
+
+int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev)
+{
+
+	/*
+	 * TODO: Define the right fields above (extlog_event_fields[]) and
+	 * add code here to save them to the database
+	 */
+	return 0;
+}
+#endif
 
 /*
  * Table and functions to handle mce:mce_record
@@ -384,6 +406,13 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras)
 					 &aer_event_tab);
 #endif
 
+#ifdef HAVE_EXTLOG
+	rc = ras_mc_create_table(priv, &extlog_event_tab);
+	if (rc == SQLITE_OK)
+		rc = ras_mc_prepare_stmt(priv, &priv->stmt_extlog_record,
+					 &extlog_event_tab);
+#endif
+
 #ifdef HAVE_MCE
 	rc = ras_mc_create_table(priv, &mce_record_tab);
 	if (rc == SQLITE_OK)
diff --git a/ras-record.h b/ras-record.h
index 6f146a875b8e..715bfbbedacc 100644
--- a/ras-record.h
+++ b/ras-record.h
@@ -40,8 +40,20 @@ struct ras_aer_event {
 	const char *msg;
 };
 
+struct ras_extlog_event {
+	char timestamp[64];
+	int etype;
+	int error_count;
+	int severity;
+	unsigned long long address;
+	const char *dimm_info;
+	const char *mem_loc;
+	const char *fru;
+};
+
 struct ras_mc_event;
 struct ras_aer_event;
+struct ras_extlog_event;
 struct mce_event;
 
 #ifdef HAVE_SQLITE3
@@ -57,18 +69,23 @@ struct sqlite3_priv {
 #ifdef HAVE_MCE
 	sqlite3_stmt	*stmt_mce_record;
 #endif
+#ifdef HAVE_EXTLOG
+	sqlite3_stmt	*stmt_extlog_record;
+#endif
 };
 
 int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras);
 int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev);
 int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev);
 int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev);
+int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev);
 
 #else
 static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; };
 static inline int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) { return 0; };
 static inline int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; };
 static inline int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) { return 0; };
+static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) { return 0; };
 
 #endif
 

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] trace, RAS: Add basic RAS trace event
  2014-03-04  9:23 ` [PATCH 1/2] trace, RAS: Add basic RAS trace event Chen, Gong
@ 2014-03-06 11:18   ` Borislav Petkov
  2014-03-06 11:43     ` Mauro Carvalho Chehab
  0 siblings, 1 reply; 26+ messages in thread
From: Borislav Petkov @ 2014-03-06 11:18 UTC (permalink / raw)
  To: Chen, Gong; +Cc: tony.luck, m.chehab, arozansk, linux-acpi

On Tue, Mar 04, 2014 at 04:23:16AM -0500, Chen, Gong wrote:
> To avoid the confuision of usage for RAS related trace event, add
> an unified RAS trace event stub.
> 
> Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
> ---
>  drivers/edac/edac_mc.c    |  3 ---
>  kernel/trace/Makefile     |  1 +
>  kernel/trace/ras-traces.c | 12 ++++++++++++
>  3 files changed, 13 insertions(+), 3 deletions(-)
>  create mode 100644 kernel/trace/ras-traces.c
> 
> diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
> index 33edd67..28c1695 100644
> --- a/drivers/edac/edac_mc.c
> +++ b/drivers/edac/edac_mc.c
> @@ -33,9 +33,6 @@
>  #include <asm/edac.h>
>  #include "edac_core.h"
>  #include "edac_module.h"
> -
> -#define CREATE_TRACE_POINTS
> -#define TRACE_INCLUDE_PATH ../../include/ras
>  #include <ras/ras_event.h>
>  
>  /* lock to memory controller's control array */
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index 1378e84..167193a 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -52,6 +52,7 @@ endif
>  obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
>  obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
>  obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
> +obj-$(CONFIG_TRACEPOINTS) += ras-traces.o

Actually I was thinking of slowly concentrating all the RAS stuff into
arch/x86/ras/.

Just add arch/x86/ras/trace.c instead please.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] trace, RAS: Add basic RAS trace event
  2014-03-06 11:18   ` Borislav Petkov
@ 2014-03-06 11:43     ` Mauro Carvalho Chehab
  2014-03-06 12:17       ` Borislav Petkov
  0 siblings, 1 reply; 26+ messages in thread
From: Mauro Carvalho Chehab @ 2014-03-06 11:43 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Chen, Gong, tony.luck, arozansk, linux-acpi

Em Thu, 06 Mar 2014 12:18:52 +0100
Borislav Petkov <bp@alien8.de> escreveu:

> On Tue, Mar 04, 2014 at 04:23:16AM -0500, Chen, Gong wrote:
> > To avoid the confuision of usage for RAS related trace event, add
> > an unified RAS trace event stub.
> > 
> > Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
> > ---
> >  drivers/edac/edac_mc.c    |  3 ---
> >  kernel/trace/Makefile     |  1 +
> >  kernel/trace/ras-traces.c | 12 ++++++++++++
> >  3 files changed, 13 insertions(+), 3 deletions(-)
> >  create mode 100644 kernel/trace/ras-traces.c
> > 
> > diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
> > index 33edd67..28c1695 100644
> > --- a/drivers/edac/edac_mc.c
> > +++ b/drivers/edac/edac_mc.c
> > @@ -33,9 +33,6 @@
> >  #include <asm/edac.h>
> >  #include "edac_core.h"
> >  #include "edac_module.h"
> > -
> > -#define CREATE_TRACE_POINTS
> > -#define TRACE_INCLUDE_PATH ../../include/ras
> >  #include <ras/ras_event.h>
> >  
> >  /* lock to memory controller's control array */
> > diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> > index 1378e84..167193a 100644
> > --- a/kernel/trace/Makefile
> > +++ b/kernel/trace/Makefile
> > @@ -52,6 +52,7 @@ endif
> >  obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
> >  obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
> >  obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
> > +obj-$(CONFIG_TRACEPOINTS) += ras-traces.o
> 
> Actually I was thinking of slowly concentrating all the RAS stuff into
> arch/x86/ras/.
> 
> Just add arch/x86/ras/trace.c instead please.

I would prefer to keep this out of arch/, because there are some 
parts of the ras infra that aren't x86 specific.

So, it would be better to put those at /drivers/ras, and add a
"depends on X86" for the x86 specifics.

Going further, it also make sense to move the EDAC drivers into it.

Regards,
Mauro

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] trace, RAS: Add basic RAS trace event
  2014-03-06 11:43     ` Mauro Carvalho Chehab
@ 2014-03-06 12:17       ` Borislav Petkov
  2014-03-06 13:06         ` Mauro Carvalho Chehab
  0 siblings, 1 reply; 26+ messages in thread
From: Borislav Petkov @ 2014-03-06 12:17 UTC (permalink / raw)
  To: Mauro Carvalho Chehab; +Cc: Chen, Gong, tony.luck, arozansk, linux-acpi

On Thu, Mar 06, 2014 at 08:43:20AM -0300, Mauro Carvalho Chehab wrote:
> I would prefer to keep this out of arch/, because there are some parts
> of the ras infra that aren't x86 specific.

Those are?

> Going further, it also make sense to move the EDAC drivers into it.

Why?

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] trace, RAS: Add basic RAS trace event
  2014-03-06 12:17       ` Borislav Petkov
@ 2014-03-06 13:06         ` Mauro Carvalho Chehab
  2014-03-06 15:26           ` Borislav Petkov
  0 siblings, 1 reply; 26+ messages in thread
From: Mauro Carvalho Chehab @ 2014-03-06 13:06 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Chen, Gong, tony.luck, arozansk, linux-acpi

Em Thu, 06 Mar 2014 13:17:14 +0100
Borislav Petkov <bp@alien8.de> escreveu:

> On Thu, Mar 06, 2014 at 08:43:20AM -0300, Mauro Carvalho Chehab wrote:
> > I would prefer to keep this out of arch/, because there are some parts
> > of the ras infra that aren't x86 specific.
> 
> Those are?

For example PCIe and memory errors are not x86-specific. Also, as ACPI 
may also be used on ARM, we may also start to have APEI errors there:
	https://lwn.net/Articles/574439/
	https://wiki.linaro.org/LEG/Engineering/Kernel/ACPI

So, better to think on that on a long term.

> > Going further, it also make sense to move the EDAC drivers into it.
> 
> Why?

In order to put all RAS drivers under the same place. We may eventually
have a subdir there for EDAC, and one per RAS report mechanism, in order
to keep it cleaner.

Regards,
Mauro

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] trace, RAS: Add basic RAS trace event
  2014-03-06 13:06         ` Mauro Carvalho Chehab
@ 2014-03-06 15:26           ` Borislav Petkov
  2014-03-06 15:39             ` Mauro Carvalho Chehab
  0 siblings, 1 reply; 26+ messages in thread
From: Borislav Petkov @ 2014-03-06 15:26 UTC (permalink / raw)
  To: Mauro Carvalho Chehab; +Cc: Chen, Gong, tony.luck, arozansk, linux-acpi

On Thu, Mar 06, 2014 at 10:06:53AM -0300, Mauro Carvalho Chehab wrote:
> For example PCIe and memory errors are not x86-specific. Also, as ACPI 
> may also be used on ARM, we may also start to have APEI errors there:
> 	https://lwn.net/Articles/574439/
> 	https://wiki.linaro.org/LEG/Engineering/Kernel/ACPI
> 
> So, better to think on that on a long term.

kernel/ras/ could also be used in that case but I guess drivers/ras/ is
fine too.

> In order to put all RAS drivers under the same place. We may
> eventually have a subdir there for EDAC, and one per RAS report
> mechanism, in order to keep it cleaner.

That doesn't bring any advantages - edac drivers are just fine in
drivers/edac/. And without benefits for a move, it would be a senseless
code churn only.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] trace, RAS: Add basic RAS trace event
  2014-03-06 15:26           ` Borislav Petkov
@ 2014-03-06 15:39             ` Mauro Carvalho Chehab
  2014-03-07  6:21               ` Chen, Gong
  0 siblings, 1 reply; 26+ messages in thread
From: Mauro Carvalho Chehab @ 2014-03-06 15:39 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Chen, Gong, tony.luck, arozansk, linux-acpi

Em Thu, 06 Mar 2014 16:26:33 +0100
Borislav Petkov <bp@alien8.de> escreveu:

> On Thu, Mar 06, 2014 at 10:06:53AM -0300, Mauro Carvalho Chehab wrote:
> > For example PCIe and memory errors are not x86-specific. Also, as ACPI 
> > may also be used on ARM, we may also start to have APEI errors there:
> > 	https://lwn.net/Articles/574439/
> > 	https://wiki.linaro.org/LEG/Engineering/Kernel/ACPI
> > 
> > So, better to think on that on a long term.
> 
> kernel/ras/ could also be used in that case but I guess drivers/ras/ is
> fine too.

Both work for me, although drivers/ras seems more adequate, IMHO,
as I expect that we'll have there both subsystem code and drivers.

> 
> > In order to put all RAS drivers under the same place. We may
> > eventually have a subdir there for EDAC, and one per RAS report
> > mechanism, in order to keep it cleaner.
> 
> That doesn't bring any advantages - edac drivers are just fine in
> drivers/edac/. And without benefits for a move, it would be a senseless
> code churn only.

No, it won't bring any technical advantage. Err... if an EDAC driver
or core would depend on something at /drivers/ras, then we may need
to add some extra early init glue, in order to be sure that the code
at /drivers/ras will be initialized before /drivers/edac, or otherwise
it would fail with both are compiled builtin.

-- 

Cheers,
Mauro

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] trace, RAS: Add basic RAS trace event
  2014-03-06 15:39             ` Mauro Carvalho Chehab
@ 2014-03-07  6:21               ` Chen, Gong
  2014-03-07  9:08                 ` Mauro Carvalho Chehab
  0 siblings, 1 reply; 26+ messages in thread
From: Chen, Gong @ 2014-03-07  6:21 UTC (permalink / raw)
  To: Mauro Carvalho Chehab; +Cc: Borislav Petkov, tony.luck, arozansk, linux-acpi

[-- Attachment #1: Type: text/plain, Size: 1356 bytes --]

On Thu, Mar 06, 2014 at 12:39:15PM -0300, Mauro Carvalho Chehab wrote:
> Date: Thu, 06 Mar 2014 12:39:15 -0300
> From: Mauro Carvalho Chehab <m.chehab@samsung.com>
> To: Borislav Petkov <bp@alien8.de>
> Cc: "Chen, Gong" <gong.chen@linux.intel.com>, tony.luck@intel.com,
>  arozansk@redhat.com, linux-acpi@vger.kernel.org
> Subject: Re: [PATCH 1/2] trace, RAS: Add basic RAS trace event
> X-Mailer: Claws Mail 3.9.3 (GTK+ 2.24.22; x86_64-redhat-linux-gnu)
> 
> Em Thu, 06 Mar 2014 16:26:33 +0100
> Borislav Petkov <bp@alien8.de> escreveu:
> 
> > On Thu, Mar 06, 2014 at 10:06:53AM -0300, Mauro Carvalho Chehab wrote:
> > > For example PCIe and memory errors are not x86-specific. Also, as ACPI 
> > > may also be used on ARM, we may also start to have APEI errors there:
> > > 	https://lwn.net/Articles/574439/
> > > 	https://wiki.linaro.org/LEG/Engineering/Kernel/ACPI
> > > 
> > > So, better to think on that on a long term.
> > 
> > kernel/ras/ could also be used in that case but I guess drivers/ras/ is
> > fine too.
> 
> Both work for me, although drivers/ras seems more adequate, IMHO,
> as I expect that we'll have there both subsystem code and drivers.
> 
OK, I will move this trace stub to drivers/ras in next version.
BTW, any other comments for 2nd patch? I thought Mauro will
have some comments for that patch. :-)

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 1/2] trace, RAS: Add basic RAS trace event
  2014-03-07  6:21               ` Chen, Gong
@ 2014-03-07  9:08                 ` Mauro Carvalho Chehab
  0 siblings, 0 replies; 26+ messages in thread
From: Mauro Carvalho Chehab @ 2014-03-07  9:08 UTC (permalink / raw)
  To: Chen, Gong; +Cc: Borislav Petkov, tony.luck, arozansk, linux-acpi

[-- Attachment #1: Type: text/plain, Size: 2013 bytes --]

Em Fri, 07 Mar 2014 01:21:54 -0500
"Chen, Gong" <gong.chen@linux.intel.com> escreveu:

> On Thu, Mar 06, 2014 at 12:39:15PM -0300, Mauro Carvalho Chehab wrote:
> > Date: Thu, 06 Mar 2014 12:39:15 -0300
> > From: Mauro Carvalho Chehab <m.chehab@samsung.com>
> > To: Borislav Petkov <bp@alien8.de>
> > Cc: "Chen, Gong" <gong.chen@linux.intel.com>, tony.luck@intel.com,
> >  arozansk@redhat.com, linux-acpi@vger.kernel.org
> > Subject: Re: [PATCH 1/2] trace, RAS: Add basic RAS trace event
> > X-Mailer: Claws Mail 3.9.3 (GTK+ 2.24.22; x86_64-redhat-linux-gnu)
> > 
> > Em Thu, 06 Mar 2014 16:26:33 +0100
> > Borislav Petkov <bp@alien8.de> escreveu:
> > 
> > > On Thu, Mar 06, 2014 at 10:06:53AM -0300, Mauro Carvalho Chehab wrote:
> > > > For example PCIe and memory errors are not x86-specific. Also, as ACPI 
> > > > may also be used on ARM, we may also start to have APEI errors there:
> > > > 	https://lwn.net/Articles/574439/
> > > > 	https://wiki.linaro.org/LEG/Engineering/Kernel/ACPI
> > > > 
> > > > So, better to think on that on a long term.
> > > 
> > > kernel/ras/ could also be used in that case but I guess drivers/ras/ is
> > > fine too.
> > 
> > Both work for me, although drivers/ras seems more adequate, IMHO,
> > as I expect that we'll have there both subsystem code and drivers.
> > 
> OK, I will move this trace stub to drivers/ras in next version.

Thanks!

> BTW, any other comments for 2nd patch? I thought Mauro will
> have some comments for that patch. :-)

Well, the comment is the same: I still think that the better would be
to map this via EDAC, in order to allow userspace to associate the DIMM
labels with the DIMMs that would be reported, as the association of
card, module, rank number, valid device into the corresponding DIMM
slot, as marked at the board's silkscreen is not trivial.

If you're not willing to do that, the better would then to add such
association logic inside the rasdaemon. Patches are welcomed.

Regards,
Mauro

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: trace, RAS: New eMCA trace event interface
  2014-03-04 17:54 ` trace, RAS: New " Luck, Tony
@ 2014-03-07  9:10   ` Mauro Carvalho Chehab
  2014-03-10 18:55     ` Tony Luck
  0 siblings, 1 reply; 26+ messages in thread
From: Mauro Carvalho Chehab @ 2014-03-07  9:10 UTC (permalink / raw)
  To: Luck, Tony; +Cc: Chen, Gong, bp, arozansk, linux-acpi

Hi Tony,

Em Tue, 04 Mar 2014 09:54:55 -0800
"Luck, Tony" <tony.luck@intel.com> escreveu:

> Here is (most of) the matching patch to rasdaemon to collect
> messages from this trace.  Rasdaemon lives at:
> 	git://git.fedorahosted.org/rasdaemon.git
> 
> Missing part is saving to sqlite data base - boiler plate is present,
> but the specifics to create tables and save to them need to be added.
> But this should serve as a useful test bed while we argue the punctuation
> in the fields supplied by the kernel trace.
> 
> Note: There are few platforms that fully support this at the moment. One
> is the "Brickland Ivy Bridge -EX" (Running BIOS 0046.R04 or newer). Another
> is the "Grantley Haswell -EP" (Running BIOS 0026 ... which I don't think is
> out yet).  Be sure to enable eMCA options in BIOS setup EDKII>Advanced>System Event Log.

Patch looks correct. I'll apply it after the corresponding Kernel patch
is applied.

As I just pointed, we'll also need some code at the rasdaemon that would
associate the CPER error location data into the corresponding DIMM label.

Regards,
Mauro
> 
> -Tony
> 
> ---
> 
> 
> From: "Luck, Tony" <tony.luck@intel.com>
> 
> [RASDAEMON] Add support for extlog events reported by the kernel
> 
> Kernel may report extra platform specific information provided by
> BIOS. Capture it and log it.
> 
> TODO: log to the SQLITE data base - just top level hooks present
> in this patch.
> 
> Signed-off-by: Tony Luck <tony.luck@intel.com>
> 
> ---
> 
> diff --git a/Makefile.am b/Makefile.am
> index c1668b4d939d..2f81d777823c 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -17,13 +17,17 @@ if WITH_MCE
>  			mce-intel-dunnington.c mce-intel-tulsa.c \
>  			mce-intel-sb.c mce-intel-ivb.c
>  endif
> +if WITH_EXTLOG
> +   rasdaemon_SOURCES += ras-extlog-handler.c
> +endif
>  if WITH_ABRT_REPORT
>     rasdaemon_SOURCES += ras-report.c
>  endif
>  rasdaemon_LDADD = -lpthread $(SQLITE3_LIBS) libtrace/libtrace.a
>  
>  include_HEADERS = config.h  ras-events.h  ras-logger.h  ras-mc-handler.h \
> -		  ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h
> +		  ras-aer-handler.h ras-mce-handler.h ras-record.h bitfield.h ras-report.h \
> +		  ras-extlog-handler.h
>  
>  # This rule can't be called with more than one Makefile job (like make -j8)
>  # I can't figure out a way to fix that
> diff --git a/configure.ac b/configure.ac
> index 5bd824c0dbe2..b050ad864cdf 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -53,6 +53,15 @@ AS_IF([test "x$enable_mce" = "xyes"], [
>  ])
>  AM_CONDITIONAL([WITH_MCE], [test x$enable_mce = xyes])
>  
> +AC_ARG_ENABLE([extlog],
> +    AS_HELP_STRING([--enable-extlog], [enable EXTLOG events (currently experimental)]))
> +
> +AS_IF([test "x$enable_extlog" = "xyes"], [
> +  AC_DEFINE(HAVE_EXTLOG,1,"have EXTLOG events collect")
> +  AC_SUBST([WITH_EXTLOG])
> +])
> +AM_CONDITIONAL([WITH_EXTLOG], [test x$enable_extlog = xyes])
> +
>  AC_ARG_ENABLE([abrt_report],
>      AS_HELP_STRING([--enable-abrt-report], [enable report event to ABRT (currently experimental)]))
>  
> diff --git a/ras-events.c b/ras-events.c
> index ecbbd3afa66d..90ea3727f667 100644
> --- a/ras-events.c
> +++ b/ras-events.c
> @@ -30,6 +30,7 @@
>  #include "ras-mc-handler.h"
>  #include "ras-aer-handler.h"
>  #include "ras-mce-handler.h"
> +#include "ras-extlog-handler.h"
>  #include "ras-record.h"
>  #include "ras-logger.h"
>  
> @@ -203,6 +204,10 @@ int toggle_ras_mc_event(int enable)
>  	rc |= __toggle_ras_mc_event(ras, "mce", "mce_record", enable);
>  #endif
>  
> +#ifdef HAVE_EXTLOG
> +	rc |= __toggle_ras_mc_event(ras, "ras", "extlog_mem_event", enable);
> +#endif
> +
>  free_ras:
>  	free(ras);
>  	return rc;
> @@ -688,6 +693,17 @@ int handle_ras_events(int record_events)
>  		    "mce", "mce_record");
>  	}
>  #endif
> +
> +#ifdef HAVE_EXTLOG
> +	rc = add_event_handler(ras, pevent, page_size, "ras", "extlog_mem_event",
> +			       ras_extlog_mem_event_handler);
> +	if (!rc)
> +		num_events++;
> +	else
> +		log(ALL, LOG_ERR, "Can't get traces from %s:%s\n",
> +		    "ras", "aer_event");
> +#endif
> +
>  	if (!num_events) {
>  		log(ALL, LOG_INFO,
>  		    "Failed to trace all supported RAS events. Aborting.\n");
> diff --git a/ras-extlog-handler.c b/ras-extlog-handler.c
> new file mode 100644
> index 000000000000..1166a5341703
> --- /dev/null
> +++ b/ras-extlog-handler.c
> @@ -0,0 +1,134 @@
> +/*
> + * Copyright (C) 2014 Tony Luck <tony.luck@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +*/
> +#include <ctype.h>
> +#include <errno.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <stdint.h>
> +#include "libtrace/kbuffer.h"
> +#include "ras-extlog-handler.h"
> +#include "ras-record.h"
> +#include "ras-logger.h"
> +#include "ras-report.h"
> +
> +static char *err_type(int etype)
> +{
> +	switch (etype) {
> +	case 0: return "unknown";
> +	case 1: return "no error";
> +	case 2: return "single-bit ECC";
> +	case 3: return "multi-bit ECC";
> +	case 4: return "single-symbol chipkill ECC";
> +	case 5: return "multi-symbol chipkill ECC";
> +	case 6: return "master abort";
> +	case 7: return "target abort";
> +	case 8: return "parity error";
> +	case 9: return "watchdog timeout";
> +	case 10: return "invalid address";
> +	case 11: return "mirror Broken";
> +	case 12: return "memory sparing";
> +	case 13: return "scrub corrected error";
> +	case 14: return "scrub uncorrected error";
> +	case 15: return "physical memory map-out event";
> +	}
> +	return "unknown-type";
> +}
> +
> +static char *err_severity(int severity)
> +{
> +	switch (severity) {
> +	case 0: return "recoverable";
> +	case 1: return "fatal";
> +	case 2: return "corrected";
> +	case 3: return "informational";
> +	}
> +	return "unknown-severity";
> +}
> +
> +static void report_extlog_mem_event(struct ras_events *ras,
> +				    struct pevent_record *record,
> +				    struct trace_seq *s,
> +				    struct ras_extlog_event *ev)
> +{
> +	trace_seq_printf(s, "%d %s error: %s %s physical addr: 0x%llx (%s %s)",
> +		ev->error_count, err_severity(ev->severity),
> +		err_type(ev->etype), ev->dimm_info, ev->address,
> +		ev->mem_loc, ev->fru);
> +}
> +
> +int ras_extlog_mem_event_handler(struct trace_seq *s,
> +			  struct pevent_record *record,
> +			  struct event_format *event, void *context)
> +{
> +	int len;
> +	unsigned long long val;
> +	struct ras_events *ras = context;
> +	time_t now;
> +	struct tm *tm;
> +	struct ras_extlog_event ev;
> +
> +	/*
> +	 * Newer kernels (3.10-rc1 or upper) provide an uptime clock.
> +	 * On previous kernels, the way to properly generate an event would
> +	 * be to inject a fake one, measure its timestamp and diff it against
> +	 * gettimeofday. We won't do it here. Instead, let's use uptime,
> +	 * falling-back to the event report's time, if "uptime" clock is
> +	 * not available (legacy kernels).
> +	 */
> +
> +	if (ras->use_uptime)
> +		now = record->ts/1000000000L + ras->uptime_diff;
> +	else
> +		now = time(NULL);
> +
> +	tm = localtime(&now);
> +	if (tm)
> +		strftime(ev.timestamp, sizeof(ev.timestamp),
> +			 "%Y-%m-%d %H:%M:%S %z", tm);
> +	trace_seq_printf(s, "%s ", ev.timestamp);
> +
> +	if (pevent_get_field_val(s,  event, "etype", record, &val, 1) < 0)
> +		return -1;
> +	ev.etype = val;
> +	if (pevent_get_field_val(s,  event, "error_count", record, &val, 1) < 0)
> +		return -1;
> +	ev.error_count = val;
> +	if (pevent_get_field_val(s,  event, "severity", record, &val, 1) < 0)
> +		return -1;
> +	ev.severity = val;
> +	if (pevent_get_field_val(s,  event, "paddr", record, &val, 1) < 0)
> +		return -1;
> +	ev.address = val;
> +
> +	ev.dimm_info = pevent_get_field_raw(s, event, "dimm_info",
> +					   record, &len, 1);
> +	ev.mem_loc = pevent_get_field_raw(s, event, "mem_loc",
> +					   record, &len, 1);
> +	ev.fru = pevent_get_field_raw(s, event, "fru",
> +					   record, &len, 1);
> +
> +	report_extlog_mem_event(ras, record, s, &ev);
> +
> +#ifdef HAVE_SQLITE3
> +	ras_store_extlog_mem_record(ras, &ev);
> +#endif
> +
> +	return 0;
> +}
> diff --git a/ras-extlog-handler.h b/ras-extlog-handler.h
> new file mode 100644
> index 000000000000..54e8cec93af9
> --- /dev/null
> +++ b/ras-extlog-handler.h
> @@ -0,0 +1,31 @@
> +/*
> + * Copyright (C) 2014 Tony Luck <tony.luck@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> +*/
> +
> +#ifndef __RAS_EXTLOG_HANDLER_H
> +#define __RAS_EXTLOG_HANDLER_H
> +
> +#include <stdint.h>
> +
> +#include "ras-events.h"
> +#include "libtrace/event-parse.h"
> +
> +extern int ras_extlog_mem_event_handler(struct trace_seq *s,
> +			  struct pevent_record *record,
> +			  struct event_format *event, void *context);
> +
> +#endif
> diff --git a/ras-record.c b/ras-record.c
> index daa3cb102883..6b4cf548bb46 100644
> --- a/ras-record.c
> +++ b/ras-record.c
> @@ -157,6 +157,28 @@ int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev)
>  }
>  #endif
>  
> +#ifdef HAVE_EXTLOG
> +static const struct db_fields extlog_event_fields[] = {
> +		{ .name="id",			.type="INTEGER PRIMARY KEY" },
> +		{ .name="timestamp",		.type="TEXT" },
> +};
> +
> +static const struct db_table_descriptor extlog_event_tab = {
> +	.name = "extlog_event",
> +	.fields = extlog_event_fields,
> +	.num_fields = ARRAY_SIZE(extlog_event_fields),
> +};
> +
> +int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev)
> +{
> +
> +	/*
> +	 * TODO: Define the right fields above (extlog_event_fields[]) and
> +	 * add code here to save them to the database
> +	 */
> +	return 0;
> +}
> +#endif
>  
>  /*
>   * Table and functions to handle mce:mce_record
> @@ -384,6 +406,13 @@ int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras)
>  					 &aer_event_tab);
>  #endif
>  
> +#ifdef HAVE_EXTLOG
> +	rc = ras_mc_create_table(priv, &extlog_event_tab);
> +	if (rc == SQLITE_OK)
> +		rc = ras_mc_prepare_stmt(priv, &priv->stmt_extlog_record,
> +					 &extlog_event_tab);
> +#endif
> +
>  #ifdef HAVE_MCE
>  	rc = ras_mc_create_table(priv, &mce_record_tab);
>  	if (rc == SQLITE_OK)
> diff --git a/ras-record.h b/ras-record.h
> index 6f146a875b8e..715bfbbedacc 100644
> --- a/ras-record.h
> +++ b/ras-record.h
> @@ -40,8 +40,20 @@ struct ras_aer_event {
>  	const char *msg;
>  };
>  
> +struct ras_extlog_event {
> +	char timestamp[64];
> +	int etype;
> +	int error_count;
> +	int severity;
> +	unsigned long long address;
> +	const char *dimm_info;
> +	const char *mem_loc;
> +	const char *fru;
> +};
> +
>  struct ras_mc_event;
>  struct ras_aer_event;
> +struct ras_extlog_event;
>  struct mce_event;
>  
>  #ifdef HAVE_SQLITE3
> @@ -57,18 +69,23 @@ struct sqlite3_priv {
>  #ifdef HAVE_MCE
>  	sqlite3_stmt	*stmt_mce_record;
>  #endif
> +#ifdef HAVE_EXTLOG
> +	sqlite3_stmt	*stmt_extlog_record;
> +#endif
>  };
>  
>  int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras);
>  int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev);
>  int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev);
>  int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev);
> +int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev);
>  
>  #else
>  static inline int ras_mc_event_opendb(unsigned cpu, struct ras_events *ras) { return 0; };
>  static inline int ras_store_mc_event(struct ras_events *ras, struct ras_mc_event *ev) { return 0; };
>  static inline int ras_store_aer_event(struct ras_events *ras, struct ras_aer_event *ev) { return 0; };
>  static inline int ras_store_mce_record(struct ras_events *ras, struct mce_event *ev) { return 0; };
> +static inline int ras_store_extlog_mem_record(struct ras_events *ras, struct ras_extlog_event *ev) { return 0; };
>  
>  #endif
>  


-- 

Cheers,
Mauro

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-04  9:23 ` [PATCH 2/2] trace, RAS: Add eMCA trace event interface Chen, Gong
@ 2014-03-07 11:44   ` Borislav Petkov
  2014-03-10  8:22     ` Chen, Gong
  0 siblings, 1 reply; 26+ messages in thread
From: Borislav Petkov @ 2014-03-07 11:44 UTC (permalink / raw)
  To: Chen, Gong; +Cc: tony.luck, m.chehab, arozansk, linux-acpi

On Tue, Mar 04, 2014 at 04:23:17AM -0500, Chen, Gong wrote:
> Add trace interface to elaborate all H/W error related information.
> 
> Signed-off-by: Chen, Gong <gong.chen@linux.intel.com>
> ---
>  drivers/acpi/Kconfig        |   3 +-
>  drivers/acpi/Makefile       |   1 +
>  drivers/acpi/acpi_extlog.c  | 131 +++++++++++++++++++++++++++++++++++++++++++-
>  drivers/firmware/efi/cper.c |  13 ++++-
>  include/linux/cper.h        |   2 +
>  include/ras/ras_event.h     |  62 +++++++++++++++++++++
>  kernel/trace/ras-traces.c   |   1 +
>  7 files changed, 208 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
> index 4770de5..3e569d4 100644
> --- a/drivers/acpi/Kconfig
> +++ b/drivers/acpi/Kconfig
> @@ -363,6 +363,7 @@ config ACPI_EXTLOG
>  
>  	  Enhanced MCA Logging allows firmware to provide additional error
>  	  information to system software, synchronous with MCE or CMCI. This
> -	  driver adds support for that functionality.
> +	  driver adds support for that functionality with corresponding
> +	  tracepoint which carries that information to userspace.
>  
>  endif	# ACPI
> diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
> index 0331f91..f6abc4a 100644
> --- a/drivers/acpi/Makefile
> +++ b/drivers/acpi/Makefile
> @@ -82,4 +82,5 @@ obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
>  
>  obj-$(CONFIG_ACPI_APEI)		+= apei/
>  
> +CFLAGS_acpi_extlog.o := -I$(src)
>  obj-$(CONFIG_ACPI_EXTLOG)	+= acpi_extlog.o
> diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
> index c4a5d87..fbdebad 100644
> --- a/drivers/acpi/acpi_extlog.c
> +++ b/drivers/acpi/acpi_extlog.c
> @@ -14,8 +14,10 @@
>  #include <linux/edac.h>
>  #include <asm/cpu.h>
>  #include <asm/mce.h>
> +#include <linux/dmi.h>
>  
>  #include "apei/apei-internal.h"
> +#include <ras/ras_event.h>
>  
>  #define EXT_ELOG_ENTRY_MASK	GENMASK_ULL(51, 0) /* elog entry address mask */
>  
> @@ -44,6 +46,11 @@ struct extlog_l1_head {
>  static int old_edac_report_status;
>  
>  static u8 extlog_dsm_uuid[] __initdata = "663E35AF-CC10-41A4-88EA-5470AF055295";
> +static const uuid_le invalid_uuid = NULL_UUID_LE;
> +
> +static DEFINE_RAW_SPINLOCK(trace_lock);
> +static char mem_location[LOC_LEN];
> +static char dimm_location[LOC_LEN];
>  
>  /* L1 table related physical address */
>  static u64 elog_base;
> @@ -69,6 +76,106 @@ static u32 l1_percpu_entry;
>  #define ELOG_ENTRY_ADDR(phyaddr) \
>  	(phyaddr - elog_base + (u8 *)elog_addr)
>  
> +static void mem_err_location(struct cper_sec_mem_err *mem)
> +{
> +	char *p;
> +	u32 n = 0;
> +
> +	memset(mem_location, 0, LOC_LEN);
> +	p = mem_location;
> +	if (mem->validation_bits & CPER_MEM_VALID_NODE)
> +		n += sprintf(p + n, " node: %d", mem->node);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_CARD)
> +		n += sprintf(p + n, " card: %d", mem->card);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
> +		n += sprintf(p + n, " module: %d", mem->module);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
> +		n += sprintf(p + n, " rank: %d", mem->rank);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_BANK)
> +		n += sprintf(p + n, " bank: %d", mem->bank);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
> +		n += sprintf(p + n, " device: %d", mem->device);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_ROW)
> +		n += sprintf(p + n, " row: %d", mem->row);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
> +		n += sprintf(p + n, " column: %d", mem->column);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
> +		n += sprintf(p + n, " bit_position: %d", mem->bit_pos);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
> +		n += sprintf(p + n, " requestor_id: 0x%016llx",
> +				mem->requestor_id);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
> +		n += sprintf(p + n, " responder_id: 0x%016llx",
> +				mem->responder_id);
> +	if (n >= LOC_LEN)
> +		goto end;
> +	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
> +		n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id);
> +end:
> +	return;
> +}

Looks like this wants to share with cper_print_mem() - definitely a lot
of duplication there.

> +
> +static void dimm_err_location(struct cper_sec_mem_err *mem)
> +{
> +	const char *bank = NULL, *device = NULL;
> +
> +	memset(dimm_location, 0, LOC_LEN);
> +	if (!(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
> +		return;
> +
> +	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
> +	if (bank != NULL && device != NULL)
> +		snprintf(dimm_location, LOC_LEN - 1, "%s %s", bank, device);
> +	else
> +		snprintf(dimm_location, LOC_LEN - 1, "DMI handle: 0x%.4x",
> +			 mem->mem_dev_handle);
> +}

This one too.

> +
> +static void trace_mem_error(const uuid_le *fru_id, char *fru_text,
> +			    u64 err_count, u32 severity,
> +			    struct cper_sec_mem_err *mem)
> +{
> +	u32 etype = ~0U;
> +	u64 phy_addr = ~0ull;

I'm assuming userspace knows that all 1s means field value is invalid?

> +	unsigned long flags;
> +
> +	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
> +		etype = mem->error_type;

newline.

> +	if (mem->validation_bits & CPER_MEM_VALID_PA) {
> +		phy_addr = mem->physical_addr;
> +		if (mem->validation_bits & CPER_MEM_VALID_PA_MASK)
> +			phy_addr &= mem->physical_addr_mask;
> +	}
> +
> +	raw_spin_lock_irqsave(&trace_lock, flags);
> +	mem_err_location(mem);
> +	dimm_err_location(mem);
> +
> +	trace_extlog_mem_event(etype, dimm_location, fru_id, fru_text,
> +			       err_count, severity, phy_addr, mem_location);
> +	raw_spin_unlock_irqrestore(&trace_lock, flags);
> +}
> +
>  static struct acpi_generic_status *extlog_elog_entry_check(int cpu, int bank)
>  {
>  	int idx;
> @@ -137,7 +244,12 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
>  	struct mce *mce = (struct mce *)data;
>  	int	bank = mce->bank;
>  	int	cpu = mce->extcpu;
> -	struct acpi_generic_status *estatus;
> +	struct acpi_generic_status *estatus, *tmp;
> +	struct acpi_generic_data *gdata;
> +	const uuid_le *fru_id = &invalid_uuid;
> +	char *fru_text = "";
> +	uuid_le *sec_type;
> +	static u64 err_count;
>  	int rc;
>  
>  	estatus = extlog_elog_entry_check(cpu, bank);
> @@ -149,6 +261,23 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
>  	estatus->block_status = 0;
>  
>  	rc = print_extlog_rcd(NULL, (struct acpi_generic_status *)elog_buf, cpu);
> +	tmp = (struct acpi_generic_status *)elog_buf;
> +	gdata = (struct acpi_generic_data *)(tmp + 1);
> +	rc = print_extlog_rcd(NULL, tmp, cpu);

We probably need a mechanism to disable printking to dmesg once
userspace has opened the tracepoint.

> +	/* trace extended error log */
> +	err_count++;
> +	if (gdata->validation_bits & CPER_SEC_VALID_FRU_ID)
> +		fru_id = (uuid_le *)gdata->fru_id;
> +	if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
> +		fru_text = gdata->fru_text;
> +	sec_type = (uuid_le *)gdata->section_type;
> +	if (!uuid_le_cmp(*sec_type, CPER_SEC_PLATFORM_MEM)) {
> +		struct cper_sec_mem_err *mem_err = (void *)(gdata + 1);
> +		if (gdata->error_data_length >= sizeof(*mem_err))
> +			trace_mem_error(fru_id, fru_text, err_count,
> +					gdata->error_severity, mem_err);
> +	}
>  
>  	return NOTIFY_STOP;
>  }
> diff --git a/drivers/firmware/efi/cper.c b/drivers/firmware/efi/cper.c
> index 1491dd4..9d3e2c4 100644
> --- a/drivers/firmware/efi/cper.c
> +++ b/drivers/firmware/efi/cper.c
> @@ -57,11 +57,12 @@ static const char *cper_severity_strs[] = {
>  	"info",
>  };
>  
> -static const char *cper_severity_str(unsigned int severity)
> +const char *cper_severity_str(unsigned int severity)
>  {
>  	return severity < ARRAY_SIZE(cper_severity_strs) ?
>  		cper_severity_strs[severity] : "unknown";
>  }
> +EXPORT_SYMBOL_GPL(cper_severity_str);

Yes, this calls for a common file sharin cper and extlog functionality.

>  /*
>   * cper_print_bits - print strings for set bits
> @@ -196,6 +197,13 @@ static const char *cper_mem_err_type_strs[] = {
>  	"physical memory map-out event",
>  };
>  
> +const char *cper_mem_err_type_str(unsigned int etype)
> +{
> +	return etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
> +		cper_mem_err_type_strs[etype] : "unknown";
> +}
> +EXPORT_SYMBOL_GPL(cper_mem_err_type_str);
> +
>  static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
>  {
>  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
> @@ -233,8 +241,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
>  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
>  		u8 etype = mem->error_type;
>  		printk("%s""error_type: %d, %s\n", pfx, etype,
> -		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
> -		       cper_mem_err_type_strs[etype] : "unknown");
> +			cper_mem_err_type_str(etype));
>  	}
>  	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
>  		const char *bank = NULL, *device = NULL;

Ditto.

> diff --git a/include/linux/cper.h b/include/linux/cper.h
> index 2fc0ec3..c6d87fc 100644
> --- a/include/linux/cper.h
> +++ b/include/linux/cper.h
> @@ -395,6 +395,8 @@ struct cper_sec_pcie {
>  #pragma pack()
>  
>  u64 cper_next_record_id(void);
> +const char *cper_severity_str(unsigned int);
> +const char *cper_mem_err_type_str(unsigned int);
>  void cper_print_bits(const char *prefix, unsigned int bits,
>  		     const char * const strs[], unsigned int strs_size);
>  
> diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
> index 21cdb0b..97f2192 100644
> --- a/include/ras/ras_event.h
> +++ b/include/ras/ras_event.h
> @@ -8,6 +8,68 @@
>  #include <linux/tracepoint.h>
>  #include <linux/edac.h>
>  #include <linux/ktime.h>
> +#include <linux/cper.h>
> +
> +/*
> + * MCE Extended Error Log trace event
> + *
> + * These events are generated when hardware detects a corrected or
> + * uncorrected event.
> + *
> + */
> +
> +/* memory trace event */
> +
> +#define LOC_LEN		512
> +
> +TRACE_EVENT(extlog_mem_event,

So this is a mem thing so we're defining a tracepoint for memory events,
specifically.

However, if extlog carries all kinds of errors outside, not only DRAM
errors, we should do a TRACE_EVENT_CLASS which contains the shared args
to every error type and then make a mem event ontop of it.

> +	TP_PROTO(u32 etype,
> +		 char *dimm_info,
> +		 const uuid_le *fru_id,
> +		 char *fru_text,
> +		 u64 error_count,
> +		 u32 severity,
> +		 u64 phy_addr,
> +		 char *mem_loc),
> +
> +	TP_ARGS(etype, dimm_info, fru_id, fru_text, error_count, severity,
> +		phy_addr, mem_loc),
> +
> +	TP_STRUCT__entry(
> +		__field(u32, etype)
> +		__dynamic_array(char, dimm_info, LOC_LEN)
> +		__field(u64, error_count)
> +		__field(u32, severity)
> +		__field(u64, paddr)
> +		__string(mem_loc, mem_loc)
> +		__dynamic_array(char, fru, LOC_LEN)
> +	),
> +
> +	TP_fast_assign(
> +		__entry->error_count = error_count;
> +		__entry->severity = severity;
> +		__entry->etype = etype;
> +		if (dimm_info[0] != '\0')
> +			snprintf(__get_dynamic_array(dimm_info), LOC_LEN - 1,
> +				 "%s", dimm_info);
> +		else
> +			__assign_str(dimm_info, "");
> +		__entry->paddr = phy_addr;
> +		__assign_str(mem_loc, mem_loc);
> +		snprintf(__get_dynamic_array(fru), LOC_LEN - 1,
> +			 "FRU: %pUl %.20s", fru_id, fru_text);
> +	),
> +
> +	TP_printk("%llu %s error%s: %s %s physical addr: 0x%016llx%s %s",
> +		  __entry->error_count,
> +		  cper_severity_str(__entry->severity),
> +		  __entry->error_count > 1 ? "s" : "",
> +		  cper_mem_err_type_str(__entry->etype),
> +		  __get_str(dimm_info),
> +		  __entry->paddr,
> +		  __get_str(mem_loc),
> +		  __get_str(fru))
> +);
>  
>  /*
>   * Hardware Events Report
> diff --git a/kernel/trace/ras-traces.c b/kernel/trace/ras-traces.c
> index b0c6ed1..197b1ea 100644
> --- a/kernel/trace/ras-traces.c
> +++ b/kernel/trace/ras-traces.c
> @@ -9,4 +9,5 @@
>  #define TRACE_INCLUDE_PATH ../../include/ras
>  #include <ras/ras_event.h>
>  
> +EXPORT_TRACEPOINT_SYMBOL_GPL(extlog_mem_event);
>  EXPORT_TRACEPOINT_SYMBOL_GPL(mc_event);
> -- 
> 1.8.4.3
> 
> 

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-07 11:44   ` Borislav Petkov
@ 2014-03-10  8:22     ` Chen, Gong
  2014-03-10 10:04       ` Mauro Carvalho Chehab
                         ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Chen, Gong @ 2014-03-10  8:22 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: tony.luck, m.chehab, arozansk, linux-acpi

[-- Attachment #1: Type: text/plain, Size: 5163 bytes --]

On Fri, Mar 07, 2014 at 12:44:16PM +0100, Borislav Petkov wrote:
[...]
> > +static void mem_err_location(struct cper_sec_mem_err *mem)
> > +{
> > +	char *p;
> > +	u32 n = 0;
> > +
> > +	memset(mem_location, 0, LOC_LEN);
> > +	p = mem_location;
> > +	if (mem->validation_bits & CPER_MEM_VALID_NODE)
> > +		n += sprintf(p + n, " node: %d", mem->node);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_CARD)
> > +		n += sprintf(p + n, " card: %d", mem->card);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
> > +		n += sprintf(p + n, " module: %d", mem->module);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
> > +		n += sprintf(p + n, " rank: %d", mem->rank);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_BANK)
> > +		n += sprintf(p + n, " bank: %d", mem->bank);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
> > +		n += sprintf(p + n, " device: %d", mem->device);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_ROW)
> > +		n += sprintf(p + n, " row: %d", mem->row);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
> > +		n += sprintf(p + n, " column: %d", mem->column);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
> > +		n += sprintf(p + n, " bit_position: %d", mem->bit_pos);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
> > +		n += sprintf(p + n, " requestor_id: 0x%016llx",
> > +				mem->requestor_id);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
> > +		n += sprintf(p + n, " responder_id: 0x%016llx",
> > +				mem->responder_id);
> > +	if (n >= LOC_LEN)
> > +		goto end;
> > +	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
> > +		n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id);
> > +end:
> > +	return;
> > +}
> 
> Looks like this wants to share with cper_print_mem() - definitely a lot
> of duplication there.
> 
> > +
> > +static void dimm_err_location(struct cper_sec_mem_err *mem)
> > +{
> > +	const char *bank = NULL, *device = NULL;
> > +
> > +	memset(dimm_location, 0, LOC_LEN);
> > +	if (!(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
> > +		return;
> > +
> > +	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
> > +	if (bank != NULL && device != NULL)
> > +		snprintf(dimm_location, LOC_LEN - 1, "%s %s", bank, device);
> > +	else
> > +		snprintf(dimm_location, LOC_LEN - 1, "DMI handle: 0x%.4x",
> > +			 mem->mem_dev_handle);
> > +}
> 
> This one too.
> 
Not really. Firstly they service for different purpose. Secondly the
format here can be changed/updated depending on further requirment.
I can't assume they always keep the same format.

> > +
> > +static void trace_mem_error(const uuid_le *fru_id, char *fru_text,
> > +			    u64 err_count, u32 severity,
> > +			    struct cper_sec_mem_err *mem)
> > +{
> > +	u32 etype = ~0U;
> > +	u64 phy_addr = ~0ull;
> 
> I'm assuming userspace knows that all 1s means field value is invalid?
Yep, I suppose so.

> 
> > +	unsigned long flags;
> > +
> > +	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
> > +		etype = mem->error_type;
> 
> newline.
Sure.

[...]
> We probably need a mechanism to disable printking to dmesg once
> userspace has opened the tracepoint.
Do we really need to do that? IMHO, I think they are used for two different
usages, just like dmesg & mcelog.

[...]
> >  static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
> >  {
> >  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
> > @@ -233,8 +241,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
> >  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
> >  		u8 etype = mem->error_type;
> >  		printk("%s""error_type: %d, %s\n", pfx, etype,
> > -		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
> > -		       cper_mem_err_type_strs[etype] : "unknown");
> > +			cper_mem_err_type_str(etype));
> >  	}
> >  	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
> >  		const char *bank = NULL, *device = NULL;
> 
> Ditto.
I know you hope the print function in CPER & trace for cpi_extlog can be
merged into one. I just have one concern about it. Can we ensure these
two functions keeping align all the time? IOW, merge them for now until
change happens one day?

[...]
> > +#define LOC_LEN		512
> > +
> > +TRACE_EVENT(extlog_mem_event,
> 
> So this is a mem thing so we're defining a tracepoint for memory events,
> specifically.
> 
> However, if extlog carries all kinds of errors outside, not only DRAM
> errors, we should do a TRACE_EVENT_CLASS which contains the shared args
> to every error type and then make a mem event ontop of it.
I agree.


[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-10  8:22     ` Chen, Gong
@ 2014-03-10 10:04       ` Mauro Carvalho Chehab
  2014-03-10 10:31         ` Borislav Petkov
  2014-03-10 10:33       ` Borislav Petkov
  2014-03-10 17:42       ` Luck, Tony
  2 siblings, 1 reply; 26+ messages in thread
From: Mauro Carvalho Chehab @ 2014-03-10 10:04 UTC (permalink / raw)
  To: Chen, Gong; +Cc: Borislav Petkov, tony.luck, arozansk, linux-acpi

Em Mon, 10 Mar 2014 04:22:42 -0400
"Chen, Gong" <gong.chen@linux.intel.com> escreveu:

> On Fri, Mar 07, 2014 at 12:44:16PM +0100, Borislav Petkov wrote:
> [...]
> > > +static void mem_err_location(struct cper_sec_mem_err *mem)
> > > +{
> > > +	char *p;
> > > +	u32 n = 0;
> > > +
> > > +	memset(mem_location, 0, LOC_LEN);
> > > +	p = mem_location;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_NODE)
> > > +		n += sprintf(p + n, " node: %d", mem->node);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_CARD)
> > > +		n += sprintf(p + n, " card: %d", mem->card);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_MODULE)
> > > +		n += sprintf(p + n, " module: %d", mem->module);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_RANK_NUMBER)
> > > +		n += sprintf(p + n, " rank: %d", mem->rank);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_BANK)
> > > +		n += sprintf(p + n, " bank: %d", mem->bank);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_DEVICE)
> > > +		n += sprintf(p + n, " device: %d", mem->device);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_ROW)
> > > +		n += sprintf(p + n, " row: %d", mem->row);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_COLUMN)
> > > +		n += sprintf(p + n, " column: %d", mem->column);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_BIT_POSITION)
> > > +		n += sprintf(p + n, " bit_position: %d", mem->bit_pos);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_REQUESTOR_ID)
> > > +		n += sprintf(p + n, " requestor_id: 0x%016llx",
> > > +				mem->requestor_id);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_RESPONDER_ID)
> > > +		n += sprintf(p + n, " responder_id: 0x%016llx",
> > > +				mem->responder_id);
> > > +	if (n >= LOC_LEN)
> > > +		goto end;
> > > +	if (mem->validation_bits & CPER_MEM_VALID_TARGET_ID)
> > > +		n += sprintf(p + n, " target_id: 0x%016llx", mem->target_id);
> > > +end:
> > > +	return;
> > > +}
> > 
> > Looks like this wants to share with cper_print_mem() - definitely a lot
> > of duplication there.
> > 
> > > +
> > > +static void dimm_err_location(struct cper_sec_mem_err *mem)
> > > +{
> > > +	const char *bank = NULL, *device = NULL;
> > > +
> > > +	memset(dimm_location, 0, LOC_LEN);
> > > +	if (!(mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE))
> > > +		return;
> > > +
> > > +	dmi_memdev_name(mem->mem_dev_handle, &bank, &device);
> > > +	if (bank != NULL && device != NULL)
> > > +		snprintf(dimm_location, LOC_LEN - 1, "%s %s", bank, device);
> > > +	else
> > > +		snprintf(dimm_location, LOC_LEN - 1, "DMI handle: 0x%.4x",
> > > +			 mem->mem_dev_handle);
> > > +}
> > 
> > This one too.
> > 
> Not really. Firstly they service for different purpose. Secondly the
> format here can be changed/updated depending on further requirment.
> I can't assume they always keep the same format.

Changing the format breaks any userspace application that relies on
parsing them. That's an API breakage. Adding more data could be
fine, if we take enough care when doing it, and properly document
how userspace is supposed to parse it.

> > > +
> > > +static void trace_mem_error(const uuid_le *fru_id, char *fru_text,
> > > +			    u64 err_count, u32 severity,
> > > +			    struct cper_sec_mem_err *mem)
> > > +{
> > > +	u32 etype = ~0U;
> > > +	u64 phy_addr = ~0ull;
> > 
> > I'm assuming userspace knows that all 1s means field value is invalid?
> Yep, I suppose so.

Well, actually, EDAC drivers use 0 to indicate an unknown physical address.
The better is to use the same standard used there.

See the code at ghes_edac.c:

	/* Cleans the error report buffer */
	memset(e, 0, sizeof (*e));
	e->error_count = 1;
	strcpy(e->label, "unknown label");
	e->msg = pvt->msg;
	e->other_detail = pvt->other_detail;
	e->top_layer = -1;
	e->mid_layer = -1;
	e->low_layer = -1;
	*pvt->other_detail = '\0';
	*pvt->msg = '\0';

> 
> > 
> > > +	unsigned long flags;
> > > +
> > > +	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE)
> > > +		etype = mem->error_type;
> > 
> > newline.
> Sure.
> 
> [...]
> > We probably need a mechanism to disable printking to dmesg once
> > userspace has opened the tracepoint.
> Do we really need to do that? IMHO, I think they are used for two different
> usages, just like dmesg & mcelog.
> 
> [...]
> > >  static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
> > >  {
> > >  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_STATUS)
> > > @@ -233,8 +241,7 @@ static void cper_print_mem(const char *pfx, const struct cper_sec_mem_err *mem)
> > >  	if (mem->validation_bits & CPER_MEM_VALID_ERROR_TYPE) {
> > >  		u8 etype = mem->error_type;
> > >  		printk("%s""error_type: %d, %s\n", pfx, etype,
> > > -		       etype < ARRAY_SIZE(cper_mem_err_type_strs) ?
> > > -		       cper_mem_err_type_strs[etype] : "unknown");
> > > +			cper_mem_err_type_str(etype));
> > >  	}
> > >  	if (mem->validation_bits & CPER_MEM_VALID_MODULE_HANDLE) {
> > >  		const char *bank = NULL, *device = NULL;
> > 
> > Ditto.
> I know you hope the print function in CPER & trace for cpi_extlog can be
> merged into one. I just have one concern about it. Can we ensure these
> two functions keeping align all the time? IOW, merge them for now until
> change happens one day?

IMHO, that's the best.

> [...]
> > > +#define LOC_LEN		512
> > > +
> > > +TRACE_EVENT(extlog_mem_event,
> > 
> > So this is a mem thing so we're defining a tracepoint for memory events,
> > specifically.
> > 
> > However, if extlog carries all kinds of errors outside, not only DRAM
> > errors, we should do a TRACE_EVENT_CLASS which contains the shared args
> > to every error type and then make a mem event ontop of it.
> I agree.

-- 

Regards,
Mauro

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-10 10:04       ` Mauro Carvalho Chehab
@ 2014-03-10 10:31         ` Borislav Petkov
  2014-03-10 11:41           ` Mauro Carvalho Chehab
  0 siblings, 1 reply; 26+ messages in thread
From: Borislav Petkov @ 2014-03-10 10:31 UTC (permalink / raw)
  To: Mauro Carvalho Chehab, Chen, Gong; +Cc: tony.luck, arozansk, linux-acpi

On Mon, Mar 10, 2014 at 07:04:35AM -0300, Mauro Carvalho Chehab wrote:
> Changing the format breaks any userspace application that relies on
> parsing them. That's an API breakage. Adding more data could be
> fine, if we take enough care when doing it, and properly document
> how userspace is supposed to parse it.

Yes, we don't want code duplication if it can be helped. Besides, having
one function doing the error format issual keeps us from the case when
having two or more diverge from one another.

> Well, actually, EDAC drivers use 0 to indicate an unknown physical address.
> The better is to use the same standard used there.

However, physical address 0 is a valid address... all FFF...Fs is hardly valid.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-10  8:22     ` Chen, Gong
  2014-03-10 10:04       ` Mauro Carvalho Chehab
@ 2014-03-10 10:33       ` Borislav Petkov
  2014-03-10 17:42       ` Luck, Tony
  2 siblings, 0 replies; 26+ messages in thread
From: Borislav Petkov @ 2014-03-10 10:33 UTC (permalink / raw)
  To: Chen, Gong; +Cc: tony.luck, m.chehab, arozansk, linux-acpi

On Mon, Mar 10, 2014 at 04:22:42AM -0400, Chen, Gong wrote:
> Do we really need to do that? IMHO, I think they are used for two
> different usages, just like dmesg & mcelog.

Yes, we do. We want to be able to disable issuing errors into dmesg so
that people don't have to parse it. This is the main reason why we're
adding tracepoints - so that we have structured error format going out
of the kernel.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-10 10:31         ` Borislav Petkov
@ 2014-03-10 11:41           ` Mauro Carvalho Chehab
  2014-03-10 13:29             ` Borislav Petkov
  0 siblings, 1 reply; 26+ messages in thread
From: Mauro Carvalho Chehab @ 2014-03-10 11:41 UTC (permalink / raw)
  To: Borislav Petkov; +Cc: Chen, Gong, tony.luck, arozansk, linux-acpi

Em Mon, 10 Mar 2014 11:31:29 +0100
Borislav Petkov <bp@alien8.de> escreveu:

> On Mon, Mar 10, 2014 at 07:04:35AM -0300, Mauro Carvalho Chehab wrote:
> > Changing the format breaks any userspace application that relies on
> > parsing them. That's an API breakage. Adding more data could be
> > fine, if we take enough care when doing it, and properly document
> > how userspace is supposed to parse it.
> 
> Yes, we don't want code duplication if it can be helped. Besides, having
> one function doing the error format issual keeps us from the case when
> having two or more diverge from one another.
> 
> > Well, actually, EDAC drivers use 0 to indicate an unknown physical address.
> > The better is to use the same standard used there.
> 
> However, physical address 0 is a valid address... all FFF...Fs is hardly valid.

True, but if we decide to go on that direction, a change like that should
then be done on all EDAC drivers, and that's an API change. 

We also need to be sure that userspace will handle this change properly.

Regards,
Mauro

-- 

Regards,
Mauro

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-10 11:41           ` Mauro Carvalho Chehab
@ 2014-03-10 13:29             ` Borislav Petkov
  2014-03-10 17:37               ` Luck, Tony
  0 siblings, 1 reply; 26+ messages in thread
From: Borislav Petkov @ 2014-03-10 13:29 UTC (permalink / raw)
  To: Mauro Carvalho Chehab; +Cc: Chen, Gong, tony.luck, arozansk, linux-acpi

On Mon, Mar 10, 2014 at 08:41:13AM -0300, Mauro Carvalho Chehab wrote:
> True, but if we decide to go on that direction, a change like that
> should then be done on all EDAC drivers, and that's an API change.
>
> We also need to be sure that userspace will handle this change
> properly.

We don't have a choice:

[    0.000000] e820: BIOS-provided physical RAM map:
[    0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009e7ff] usable

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-10 13:29             ` Borislav Petkov
@ 2014-03-10 17:37               ` Luck, Tony
  2014-03-11 14:27                 ` Borislav Petkov
  0 siblings, 1 reply; 26+ messages in thread
From: Luck, Tony @ 2014-03-10 17:37 UTC (permalink / raw)
  To: Borislav Petkov, Mauro Carvalho Chehab
  Cc: Chen, Gong, arozansk@redhat.com, linux-acpi@vger.kernel.org

>> True, but if we decide to go on that direction, a change like that
>> should then be done on all EDAC drivers, and that's an API change.
>>
>> We also need to be sure that userspace will handle this change
>> properly.
>
> We don't have a choice:
>
> [    0.000000] e820: BIOS-provided physical RAM map:
> [    0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009e7ff] usable

Since we have separate trace points for EDCA and extlog - we can use different
conventions for "invalid".  If you think that changing the ABI in EDAC would be a
problem - then you can keep running the almost infinitesimal chance that you have
a real error at 0x00000000000000 that you treat as unknown address.

Does X86 actually allocate that page for use? - I know on Itanium it got thrown out
when rounding usable blocks of memory to 16MB boundaries for cache coherency
reasons.

-Tony

^ permalink raw reply	[flat|nested] 26+ messages in thread

* RE: [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-10  8:22     ` Chen, Gong
  2014-03-10 10:04       ` Mauro Carvalho Chehab
  2014-03-10 10:33       ` Borislav Petkov
@ 2014-03-10 17:42       ` Luck, Tony
  2014-03-11  7:03         ` Chen, Gong
  2 siblings, 1 reply; 26+ messages in thread
From: Luck, Tony @ 2014-03-10 17:42 UTC (permalink / raw)
  To: Chen, Gong, Borislav Petkov
  Cc: m.chehab@samsung.com, arozansk@redhat.com,
	linux-acpi@vger.kernel.org

> Not really. Firstly they service for different purpose. Secondly the
> format here can be changed/updated depending on further requirment.
> I can't assume they always keep the same format.

I can't imagine a reason why we'd want them to be different though.
If a reason does come up in the future, then we can clone & edit
the function to do different things - but until then we should share code.

Note: there will be changes periodically when UEFI changes the error
record format (they have added new fields in the past - I expect they
will add more in the future).  We should be able to add more stuff to
the end of the "mem_loc[]" string as long as we write user-mode
parsers that won't be surprised by extra fields there.

-Tony

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: trace, RAS: New eMCA trace event interface
  2014-03-07  9:10   ` Mauro Carvalho Chehab
@ 2014-03-10 18:55     ` Tony Luck
  2014-03-10 19:41       ` Mauro Carvalho Chehab
  0 siblings, 1 reply; 26+ messages in thread
From: Tony Luck @ 2014-03-10 18:55 UTC (permalink / raw)
  To: Mauro Carvalho Chehab
  Cc: Chen, Gong, Borislav Petkov, arozansk@redhat.com, linux-acpi

On Fri, Mar 7, 2014 at 1:10 AM, Mauro Carvalho Chehab
<m.chehab@samsung.com> wrote:
> As I just pointed, we'll also need some code at the rasdaemon that would
> associate the CPER error location data into the corresponding DIMM label.

In most cases the kernel will provide a label (that it got from
SMBIOS). I assume
this is for the case that the SMBIOS table is bad?

I haven't looked too hard at how EDAC does this - the "--register-labels" option
to edac-ctl?  Since we don't have a sysfs interface for extlog - perhaps any
translation can be handled entirely in rasdaemon(8).

-Tony

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: trace, RAS: New eMCA trace event interface
  2014-03-10 18:55     ` Tony Luck
@ 2014-03-10 19:41       ` Mauro Carvalho Chehab
  0 siblings, 0 replies; 26+ messages in thread
From: Mauro Carvalho Chehab @ 2014-03-10 19:41 UTC (permalink / raw)
  To: Tony Luck; +Cc: Chen, Gong, Borislav Petkov, arozansk@redhat.com, linux-acpi

Em Mon, 10 Mar 2014 11:55:24 -0700
Tony Luck <tony.luck@gmail.com> escreveu:

> On Fri, Mar 7, 2014 at 1:10 AM, Mauro Carvalho Chehab
> <m.chehab@samsung.com> wrote:
> > As I just pointed, we'll also need some code at the rasdaemon that would
> > associate the CPER error location data into the corresponding DIMM label.
> 
> In most cases the kernel will provide a label (that it got from
> SMBIOS). I assume
> this is for the case that the SMBIOS table is bad?

Well, except for reference boards, I'm yet to see such info
properly filled at the SMBIOS table that matches the names
printed at the motherboard.

> I haven't looked too hard at how EDAC does this - the "--register-labels" option
> to edac-ctl?

Right now, it uses the EDAC sysfs nodes to store the labels at
the Kernel. The advantage is that a fatal error dmesg will also
print the proper labels.

Moving it to userspace will only solve for non-fatal errors where
the rasdaemon will handle the traces.

> Since we don't have a sysfs interface for extlog - perhaps any
> translation can be handled entirely in rasdaemon(8).

That could be done, but some additional logic will be needed to
teach the rasdaemon that the information should be stored on some
database. The rasdaemon would also need to detect at runtime if
the stored data matches the machine where it is running.

Regards,
Mauro

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-10 17:42       ` Luck, Tony
@ 2014-03-11  7:03         ` Chen, Gong
  0 siblings, 0 replies; 26+ messages in thread
From: Chen, Gong @ 2014-03-11  7:03 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Borislav Petkov, m.chehab@samsung.com, arozansk@redhat.com,
	linux-acpi@vger.kernel.org

[-- Attachment #1: Type: text/plain, Size: 740 bytes --]

On Mon, Mar 10, 2014 at 05:42:45PM +0000, Luck, Tony wrote:
> I can't imagine a reason why we'd want them to be different though.
> If a reason does come up in the future, then we can clone & edit
> the function to do different things - but until then we should share code.
> 
> Note: there will be changes periodically when UEFI changes the error
> record format (they have added new fields in the past - I expect they
> will add more in the future).  We should be able to add more stuff to
> the end of the "mem_loc[]" string as long as we write user-mode
> parsers that won't be surprised by extra fields there.
> 
> -Tony

OK, I will follow you guys suggestion for next version, like code
sharing, trace/printk switching

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH 2/2] trace, RAS: Add eMCA trace event interface
  2014-03-10 17:37               ` Luck, Tony
@ 2014-03-11 14:27                 ` Borislav Petkov
  0 siblings, 0 replies; 26+ messages in thread
From: Borislav Petkov @ 2014-03-11 14:27 UTC (permalink / raw)
  To: Luck, Tony
  Cc: Mauro Carvalho Chehab, Chen, Gong, arozansk@redhat.com,
	linux-acpi@vger.kernel.org

On Mon, Mar 10, 2014 at 05:37:19PM +0000, Luck, Tony wrote:
> Since we have separate trace points for EDCA and extlog - we can use
> different conventions for "invalid". If you think that changing the
> ABI in EDAC would be a problem - then you can keep running the almost
> infinitesimal chance that you have a real error at 0x00000000000000
> that you treat as unknown address.

I hardly believe changing that would be noticed by anyone.

> Does X86 actually allocate that page for use? - I know on Itanium
> it got thrown out when rounding usable blocks of memory to 16MB
> boundaries for cache coherency reasons.

Yeah, it turns out we reserve at least the first page - 64K by default
though - because BIOS likes to make love to that range. Detailed
explanation from Kconfig text below.

Still, for correctness sake at least, I think we should use a value
which denotes an invalid memory address, i.e. ~0x0, for example, and not
a valid one.

config X86_RESERVE_LOW
	int "Amount of low memory, in kilobytes, to reserve for the BIOS"
	default 64
	range 4 640
	---help---
	  Specify the amount of low memory to reserve for the BIOS.

	  The first page contains BIOS data structures that the kernel
	  must not use, so that page must always be reserved.

	  By default we reserve the first 64K of physical RAM, as a
	  number of BIOSes are known to corrupt that memory range
	  during events such as suspend/resume or monitor cable
	  insertion, so it must not be used by the kernel.

	  You can set this to 4 if you are absolutely sure that you
	  trust the BIOS to get all its memory reservations and usages
	  right.  If you know your BIOS have problems beyond the
	  default 64K area, you can set this to 640 to avoid using the
	  entire low memory range.

	  If you have doubts about the BIOS (e.g. suspend/resume does
	  not work or there's kernel crashes after certain hardware
	  hotplug events) then you might want to enable
	  X86_CHECK_BIOS_CORRUPTION=y to allow the kernel to check
	  typical corruption patterns.

	  Leave this to the default value of 64 if you are unsure.

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2014-03-11 14:27 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-03-04  9:23 trace, RAS: New eMCA trace event interface Chen, Gong
2014-03-04  9:23 ` [PATCH 1/2] trace, RAS: Add basic RAS trace event Chen, Gong
2014-03-06 11:18   ` Borislav Petkov
2014-03-06 11:43     ` Mauro Carvalho Chehab
2014-03-06 12:17       ` Borislav Petkov
2014-03-06 13:06         ` Mauro Carvalho Chehab
2014-03-06 15:26           ` Borislav Petkov
2014-03-06 15:39             ` Mauro Carvalho Chehab
2014-03-07  6:21               ` Chen, Gong
2014-03-07  9:08                 ` Mauro Carvalho Chehab
2014-03-04  9:23 ` [PATCH 2/2] trace, RAS: Add eMCA trace event interface Chen, Gong
2014-03-07 11:44   ` Borislav Petkov
2014-03-10  8:22     ` Chen, Gong
2014-03-10 10:04       ` Mauro Carvalho Chehab
2014-03-10 10:31         ` Borislav Petkov
2014-03-10 11:41           ` Mauro Carvalho Chehab
2014-03-10 13:29             ` Borislav Petkov
2014-03-10 17:37               ` Luck, Tony
2014-03-11 14:27                 ` Borislav Petkov
2014-03-10 10:33       ` Borislav Petkov
2014-03-10 17:42       ` Luck, Tony
2014-03-11  7:03         ` Chen, Gong
2014-03-04 17:54 ` trace, RAS: New " Luck, Tony
2014-03-07  9:10   ` Mauro Carvalho Chehab
2014-03-10 18:55     ` Tony Luck
2014-03-10 19:41       ` Mauro Carvalho Chehab

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox