qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] cxl: avoid duplicating report from MCE & device
@ 2024-06-18 16:53 Shiyang Ruan via
  2024-06-18 23:35 ` Dave Jiang
                   ` (3 more replies)
  0 siblings, 4 replies; 20+ messages in thread
From: Shiyang Ruan via @ 2024-06-18 16:53 UTC (permalink / raw)
  To: qemu-devel, linux-cxl
  Cc: jonathan.cameron, dan.j.williams, dave, ira.weiny,
	alison.schofield, dave.jiang, vishal.l.verma

Background:
Since CXL device is a memory device, while CPU consumes a poison page of 
CXL device, it always triggers a MCE by interrupt (INT18), no matter 
which-First path is configured.  This is the first report.  Then 
currently, in FW-First path, the poison event is transferred according 
to the following process: CXL device -> firmware -> OS:ACPI->APEI->GHES 
 -> CPER -> trace report.  This is the second one.  These two reports
are indicating the same poisoning page, which is the so-called "duplicate
report"[1].  And the memory_failure() handling I'm trying to add in
OS-First path could also be another duplicate report.

Hope the flow below could make it easier to understand:
CPU accesses bad memory on CXL device, then
 -> MCE (INT18), *always* report (1)
 -> * FW-First (implemented now)
      -> CXL device -> FW
	      -> OS:ACPI->APEI->GHES->CPER -> trace report (2.a)
    * OS-First (not implemented yet, I'm working on it)
      -> CXL device -> MSI
	      -> OS:CXL driver -> memory_failure() (2.b)
so, the (1) and (2.a/b) are duplicated.

(I didn't get response in my reply for [1] while I have to make patch to
solve this problem, so please correct me if my understanding is wrong.)

This patch adds a new notifier_block and MCE_PRIO_CXL, for CXL memdev
to check whether the current poison page has been reported (if yes,
stop the notifier chain, won't call the following memory_failure()
to report), into `x86_mce_decoder_chain`.  In this way, if the poison
page already handled(recorded and reported) in (1) or (2), the other one
won't duplicate the report.  The record could be clear when
cxl_clear_poison() is called.

[1] https://lore.kernel.org/linux-cxl/664d948fb86f0_e8be294f8@dwillia2-mobl3.amr.corp.intel.com.notmuch/

Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
---
 arch/x86/include/asm/mce.h |   1 +
 drivers/cxl/core/mbox.c    | 130 +++++++++++++++++++++++++++++++++++++
 drivers/cxl/core/memdev.c  |   6 +-
 drivers/cxl/cxlmem.h       |   3 +
 4 files changed, 139 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index dfd2e9699bd7..d8109c48e7d9 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -182,6 +182,7 @@ enum mce_notifier_prios {
 	MCE_PRIO_NFIT,
 	MCE_PRIO_EXTLOG,
 	MCE_PRIO_UC,
+	MCE_PRIO_CXL,
 	MCE_PRIO_EARLY,
 	MCE_PRIO_CEC,
 	MCE_PRIO_HIGHEST = MCE_PRIO_CEC
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 2626f3fff201..0eb3c5401e81 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -4,6 +4,8 @@
 #include <linux/debugfs.h>
 #include <linux/ktime.h>
 #include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <asm/mce.h>
 #include <asm/unaligned.h>
 #include <cxlpci.h>
 #include <cxlmem.h>
@@ -880,6 +882,9 @@ void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
 		if (cxlr)
 			hpa = cxl_trace_hpa(cxlr, cxlmd, dpa);
 
+		if (hpa != ULLONG_MAX && cxl_mce_recorded(hpa))
+			return;
+
 		if (event_type == CXL_CPER_EVENT_GEN_MEDIA)
 			trace_cxl_general_media(cxlmd, type, cxlr, hpa,
 						&evt->gen_media);
@@ -1408,6 +1413,127 @@ int cxl_poison_state_init(struct cxl_memdev_state *mds)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, CXL);
 
+struct cxl_mce_record {
+	struct list_head node;
+	u64 hpa;
+};
+LIST_HEAD(cxl_mce_records);
+DEFINE_MUTEX(cxl_mce_mutex);
+
+bool cxl_mce_recorded(u64 hpa)
+{
+	struct cxl_mce_record *cur, *next, *rec;
+	int rc;
+
+	rc = mutex_lock_interruptible(&cxl_mce_mutex);
+	if (rc)
+		return false;
+
+	list_for_each_entry_safe(cur, next, &cxl_mce_records, node) {
+		if (cur->hpa == hpa) {
+			mutex_unlock(&cxl_mce_mutex);
+			return true;
+		}
+	}
+
+	rec = kmalloc(sizeof(struct cxl_mce_record), GFP_KERNEL);
+	rec->hpa = hpa;
+	list_add(&cxl_mce_records, &rec->node);
+
+	mutex_unlock(&cxl_mce_mutex);
+
+	return false;
+}
+
+void cxl_mce_clear(u64 hpa)
+{
+	struct cxl_mce_record *cur, *next;
+	int rc;
+
+	rc = mutex_lock_interruptible(&cxl_mce_mutex);
+	if (rc)
+		return;
+
+	list_for_each_entry_safe(cur, next, &cxl_mce_records, node) {
+		if (cur->hpa == hpa) {
+			list_del(&cur->node);
+			break;
+		}
+	}
+
+	mutex_unlock(&cxl_mce_mutex);
+}
+
+struct cxl_contains_hpa_context {
+	bool contains;
+	u64 hpa;
+};
+
+static int __cxl_contains_hpa(struct device *dev, void *arg)
+{
+	struct cxl_contains_hpa_context *ctx = arg;
+	struct cxl_endpoint_decoder *cxled;
+	struct range *range;
+	u64 hpa = ctx->hpa;
+
+	if (!is_endpoint_decoder(dev))
+		return 0;
+
+	cxled = to_cxl_endpoint_decoder(dev);
+	range = &cxled->cxld.hpa_range;
+
+	if (range->start <= hpa && hpa <= range->end) {
+		ctx->contains = true;
+		return 1;
+	}
+
+	return 0;
+}
+
+static bool cxl_contains_hpa(const struct cxl_memdev *cxlmd, u64 hpa)
+{
+	struct cxl_contains_hpa_context ctx = {
+		.contains = false,
+		.hpa = hpa,
+	};
+	struct cxl_port *port;
+
+	port = cxlmd->endpoint;
+	if (port && is_cxl_endpoint(port) && cxl_num_decoders_committed(port))
+		device_for_each_child(&port->dev, &ctx, __cxl_contains_hpa);
+
+	return ctx.contains;
+}
+
+static int cxl_handle_mce(struct notifier_block *nb, unsigned long val,
+			  void *data)
+{
+	struct mce *mce = (struct mce *)data;
+	struct cxl_memdev_state *mds = container_of(nb, struct cxl_memdev_state,
+						    mce_notifier);
+	u64 hpa;
+
+	if (!mce || !mce_usable_address(mce))
+		return NOTIFY_DONE;
+
+	hpa = mce->addr & MCI_ADDR_PHYSADDR;
+
+	/* Check if the PFN is located on this CXL device */
+	if (!pfn_valid(hpa >> PAGE_SHIFT) &&
+	    !cxl_contains_hpa(mds->cxlds.cxlmd, hpa))
+		return NOTIFY_DONE;
+
+	/*
+	 * Search PFN in the cxl_mce_records, if already exists, don't continue
+	 * to do memory_failure() to avoid a poison address being reported
+	 * more than once.
+	 */
+	if (cxl_mce_recorded(hpa))
+		return NOTIFY_STOP;
+	else
+		return NOTIFY_OK;
+}
+
 struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
 {
 	struct cxl_memdev_state *mds;
@@ -1427,6 +1553,10 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
 	mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID;
 	mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID;
 
+	mds->mce_notifier.notifier_call = cxl_handle_mce;
+	mds->mce_notifier.priority = MCE_PRIO_CXL;
+	mce_register_decode_chain(&mds->mce_notifier);
+
 	return mds;
 }
 EXPORT_SYMBOL_NS_GPL(cxl_memdev_state_create, CXL);
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index 0277726afd04..aa3ac89d17be 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -376,10 +376,14 @@ int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa)
 		goto out;
 
 	cxlr = cxl_dpa_to_region(cxlmd, dpa);
-	if (cxlr)
+	if (cxlr) {
+		u64 hpa = cxl_trace_hpa(cxlr, cxlmd, dpa);
+
+		cxl_mce_clear(hpa);
 		dev_warn_once(mds->cxlds.dev,
 			      "poison clear dpa:%#llx region: %s\n", dpa,
 			      dev_name(&cxlr->dev));
+	}
 
 	record = (struct cxl_poison_record) {
 		.address = cpu_to_le64(dpa),
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
index 19aba81cdf13..fbf8d9f46984 100644
--- a/drivers/cxl/cxlmem.h
+++ b/drivers/cxl/cxlmem.h
@@ -501,6 +501,7 @@ struct cxl_memdev_state {
 	struct cxl_fw_state fw;
 
 	struct rcuwait mbox_wait;
+	struct notifier_block mce_notifier;
 	int (*mbox_send)(struct cxl_memdev_state *mds,
 			 struct cxl_mbox_cmd *cmd);
 };
@@ -836,6 +837,8 @@ int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
 int cxl_trigger_poison_list(struct cxl_memdev *cxlmd);
 int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa);
 int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa);
+bool cxl_mce_recorded(u64 pfn);
+void cxl_mce_clear(u64 pfn);
 
 #ifdef CONFIG_CXL_SUSPEND
 void cxl_mem_active_inc(void);
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2024-07-25  6:34 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-06-18 16:53 [RFC PATCH] cxl: avoid duplicating report from MCE & device Shiyang Ruan via
2024-06-18 23:35 ` Dave Jiang
2024-06-19  9:24   ` Shiyang Ruan via
2024-06-20 15:51     ` Dave Jiang
2024-06-21 10:18       ` Shiyang Ruan via
2024-06-20 17:02 ` Jonathan Cameron via
2024-06-21 10:16   ` Shiyang Ruan via
2024-06-21 17:21     ` Jonathan Cameron via
2024-06-21 17:59   ` Dan Williams
2024-06-21 18:45     ` Jonathan Cameron via
2024-06-21 20:44       ` Luck, Tony
2024-06-26  6:03         ` Shiyang Ruan via
2024-06-26 15:56           ` Luck, Tony
2024-06-21 17:51 ` Dan Williams
2024-06-25 13:56   ` Shiyang Ruan via
2024-07-02  2:12     ` Shiyang Ruan via
2024-07-19 16:04       ` Dave Jiang
2024-07-22  7:01         ` Shiyang Ruan via
2024-07-25  2:51           ` Yasunori Gotou (Fujitsu) via
2024-07-19  6:24 ` Shiyang Ruan via

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).