linux-cxl.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
@ 2025-07-21 17:04 Dave Jiang
  2025-07-22 10:57 ` Jonathan Cameron
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Dave Jiang @ 2025-07-21 17:04 UTC (permalink / raw)
  To: linux-cxl
  Cc: dave, jonathan.cameron, alison.schofield, vishal.l.verma,
	ira.weiny, dan.j.williams, Robert Richter, Terry Bowman

Remove from core/pci_aer.c and move the CONFIG_PCIEAER_CXL ifdef to
cxlpci.h to deal with the exported symbols as needed. There is enough
AER handling code (and more to come) to move the AER code to its own
C file.

Cc: Robert Richter <rrichter@amd.com>
Cc: Terry Bowman <terry.bowman@amd.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
v2:
- Address 0-day reprot of cxl_handle_rdport_errors symbol undefined
---
 drivers/cxl/core/Makefile  |   1 +
 drivers/cxl/core/core.h    |   9 ++
 drivers/cxl/core/pci.c     | 171 +------------------------------------
 drivers/cxl/core/pci_aer.c | 168 ++++++++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h          |   8 --
 drivers/cxl/cxlpci.h       |   8 ++
 tools/testing/cxl/Kbuild   |   1 +
 7 files changed, 189 insertions(+), 177 deletions(-)
 create mode 100644 drivers/cxl/core/pci_aer.c

diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 79e2ef81fde8..bcea856157af 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -21,3 +21,4 @@ cxl_core-$(CONFIG_CXL_REGION) += region.o
 cxl_core-$(CONFIG_CXL_MCE) += mce.o
 cxl_core-$(CONFIG_CXL_FEATURES) += features.o
 cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
+cxl_core-$(CONFIG_PCIEAER_CXL) += pci_aer.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 29b61828a847..96c319c66a48 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -136,4 +136,13 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
 		    u16 *return_code);
 #endif
 
+#ifdef CONFIG_PCIEAER_CXL
+void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
+#else
+static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) {}
+#endif
+
+void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
+bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
+
 #endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index b50551601c2e..bd78369bc3f4 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -664,8 +664,7 @@ void read_cdat_data(struct cxl_port *port)
 }
 EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
 
-static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
-				 void __iomem *ras_base)
+void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
 {
 	void __iomem *addr;
 	u32 status;
@@ -707,8 +706,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
  * Log the state of the RAS status registers and prepare them to log the
  * next error status. Return 1 if reset needed.
  */
-static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
-				  void __iomem *ras_base)
+bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
 {
 	u32 hl[CXL_HEADERLOG_SIZE_U32];
 	void __iomem *addr;
@@ -746,171 +744,6 @@ static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
 	return __cxl_handle_ras(cxlds, cxlds->regs.ras);
 }
 
-#ifdef CONFIG_PCIEAER_CXL
-
-static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
-{
-	resource_size_t aer_phys;
-	struct device *host;
-	u16 aer_cap;
-
-	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
-	if (aer_cap) {
-		host = dport->reg_map.host;
-		aer_phys = aer_cap + dport->rcrb.base;
-		dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
-						sizeof(struct aer_capability_regs));
-	}
-}
-
-static void cxl_dport_map_ras(struct cxl_dport *dport)
-{
-	struct cxl_register_map *map = &dport->reg_map;
-	struct device *dev = dport->dport_dev;
-
-	if (!map->component_map.ras.valid)
-		dev_dbg(dev, "RAS registers not found\n");
-	else if (cxl_map_component_regs(map, &dport->regs.component,
-					BIT(CXL_CM_CAP_CAP_ID_RAS)))
-		dev_dbg(dev, "Failed to map RAS capability.\n");
-}
-
-static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
-{
-	void __iomem *aer_base = dport->regs.dport_aer;
-	u32 aer_cmd_mask, aer_cmd;
-
-	if (!aer_base)
-		return;
-
-	/*
-	 * Disable RCH root port command interrupts.
-	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
-	 *
-	 * This sequence may not be necessary. CXL spec states disabling
-	 * the root cmd register's interrupts is required. But, PCI spec
-	 * shows these are disabled by default on reset.
-	 */
-	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
-			PCI_ERR_ROOT_CMD_NONFATAL_EN |
-			PCI_ERR_ROOT_CMD_FATAL_EN);
-	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
-	aer_cmd &= ~aer_cmd_mask;
-	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
-}
-
-/**
- * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
- * @dport: the cxl_dport that needs to be initialized
- * @host: host device for devm operations
- */
-void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
-{
-	dport->reg_map.host = host;
-	cxl_dport_map_ras(dport);
-
-	if (dport->rch) {
-		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
-
-		if (!host_bridge->native_aer)
-			return;
-
-		cxl_dport_map_rch_aer(dport);
-		cxl_disable_rch_root_ints(dport);
-	}
-}
-EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
-
-static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
-					  struct cxl_dport *dport)
-{
-	return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
-}
-
-static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
-				       struct cxl_dport *dport)
-{
-	return __cxl_handle_ras(cxlds, dport->regs.ras);
-}
-
-/*
- * Copy the AER capability registers using 32 bit read accesses.
- * This is necessary because RCRB AER capability is MMIO mapped. Clear the
- * status after copying.
- *
- * @aer_base: base address of AER capability block in RCRB
- * @aer_regs: destination for copying AER capability
- */
-static bool cxl_rch_get_aer_info(void __iomem *aer_base,
-				 struct aer_capability_regs *aer_regs)
-{
-	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
-	u32 *aer_regs_buf = (u32 *)aer_regs;
-	int n;
-
-	if (!aer_base)
-		return false;
-
-	/* Use readl() to guarantee 32-bit accesses */
-	for (n = 0; n < read_cnt; n++)
-		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
-
-	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
-	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
-
-	return true;
-}
-
-/* Get AER severity. Return false if there is no error. */
-static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
-				     int *severity)
-{
-	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
-		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
-			*severity = AER_FATAL;
-		else
-			*severity = AER_NONFATAL;
-		return true;
-	}
-
-	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
-		*severity = AER_CORRECTABLE;
-		return true;
-	}
-
-	return false;
-}
-
-static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
-{
-	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
-	struct aer_capability_regs aer_regs;
-	struct cxl_dport *dport;
-	int severity;
-
-	struct cxl_port *port __free(put_cxl_port) =
-		cxl_pci_find_port(pdev, &dport);
-	if (!port)
-		return;
-
-	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
-		return;
-
-	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
-		return;
-
-	pci_print_aer(pdev, severity, &aer_regs);
-
-	if (severity == AER_CORRECTABLE)
-		cxl_handle_rdport_cor_ras(cxlds, dport);
-	else
-		cxl_handle_rdport_ras(cxlds, dport);
-}
-
-#else
-static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
-#endif
-
 void cxl_cor_error_detected(struct pci_dev *pdev)
 {
 	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
diff --git a/drivers/cxl/core/pci_aer.c b/drivers/cxl/core/pci_aer.c
new file mode 100644
index 000000000000..cf1e9377752e
--- /dev/null
+++ b/drivers/cxl/core/pci_aer.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2025 AMD Corporation. All rights reserved. */
+/* Copyright(c) 2025 Intel Corporation. All rights reserved. */
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <cxlpci.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "core.h"
+
+static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
+{
+	resource_size_t aer_phys;
+	struct device *host;
+	u16 aer_cap;
+
+	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
+	if (aer_cap) {
+		host = dport->reg_map.host;
+		aer_phys = aer_cap + dport->rcrb.base;
+		dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
+						sizeof(struct aer_capability_regs));
+	}
+}
+
+static void cxl_dport_map_ras(struct cxl_dport *dport)
+{
+	struct cxl_register_map *map = &dport->reg_map;
+	struct device *dev = dport->dport_dev;
+
+	if (!map->component_map.ras.valid)
+		dev_dbg(dev, "RAS registers not found\n");
+	else if (cxl_map_component_regs(map, &dport->regs.component,
+					BIT(CXL_CM_CAP_CAP_ID_RAS)))
+		dev_dbg(dev, "Failed to map RAS capability.\n");
+}
+
+static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
+{
+	void __iomem *aer_base = dport->regs.dport_aer;
+	u32 aer_cmd_mask, aer_cmd;
+
+	if (!aer_base)
+		return;
+
+	/*
+	 * Disable RCH root port command interrupts.
+	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
+	 *
+	 * This sequence may not be necessary. CXL spec states disabling
+	 * the root cmd register's interrupts is required. But, PCI spec
+	 * shows these are disabled by default on reset.
+	 */
+	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
+			PCI_ERR_ROOT_CMD_NONFATAL_EN |
+			PCI_ERR_ROOT_CMD_FATAL_EN);
+	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
+	aer_cmd &= ~aer_cmd_mask;
+	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
+}
+
+/**
+ * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
+ * @dport: the cxl_dport that needs to be initialized
+ * @host: host device for devm operations
+ */
+void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
+{
+	dport->reg_map.host = host;
+	cxl_dport_map_ras(dport);
+
+	if (dport->rch) {
+		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
+
+		if (!host_bridge->native_aer)
+			return;
+
+		cxl_dport_map_rch_aer(dport);
+		cxl_disable_rch_root_ints(dport);
+	}
+}
+EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
+
+static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
+					  struct cxl_dport *dport)
+{
+	return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
+}
+
+static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
+				       struct cxl_dport *dport)
+{
+	return __cxl_handle_ras(cxlds, dport->regs.ras);
+}
+
+/*
+ * Copy the AER capability registers using 32 bit read accesses.
+ * This is necessary because RCRB AER capability is MMIO mapped. Clear the
+ * status after copying.
+ *
+ * @aer_base: base address of AER capability block in RCRB
+ * @aer_regs: destination for copying AER capability
+ */
+static bool cxl_rch_get_aer_info(void __iomem *aer_base,
+				 struct aer_capability_regs *aer_regs)
+{
+	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
+	u32 *aer_regs_buf = (u32 *)aer_regs;
+	int n;
+
+	if (!aer_base)
+		return false;
+
+	/* Use readl() to guarantee 32-bit accesses */
+	for (n = 0; n < read_cnt; n++)
+		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
+
+	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
+	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
+
+	return true;
+}
+
+/* Get AER severity. Return false if there is no error. */
+static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
+				     int *severity)
+{
+	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
+		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
+			*severity = AER_FATAL;
+		else
+			*severity = AER_NONFATAL;
+		return true;
+	}
+
+	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
+		*severity = AER_CORRECTABLE;
+		return true;
+	}
+
+	return false;
+}
+
+void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
+{
+	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+	struct aer_capability_regs aer_regs;
+	struct cxl_dport *dport;
+	int severity;
+
+	struct cxl_port *port __free(put_cxl_port) =
+		cxl_pci_find_port(pdev, &dport);
+	if (!port)
+		return;
+
+	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
+		return;
+
+	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
+		return;
+
+	pci_print_aer(pdev, severity, &aer_regs);
+
+	if (severity == AER_CORRECTABLE)
+		cxl_handle_rdport_cor_ras(cxlds, dport);
+	else
+		cxl_handle_rdport_ras(cxlds, dport);
+}
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 3f1695c96abc..73397da7f970 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -759,14 +759,6 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
 					 struct device *dport_dev, int port_id,
 					 resource_size_t rcrb);
 
-#ifdef CONFIG_PCIEAER_CXL
-void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
-void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
-#else
-static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
-						struct device *host) { }
-#endif
-
 struct cxl_decoder *to_cxl_decoder(struct device *dev);
 struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
 struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 54e219b0049e..5f73ce42a758 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -135,4 +135,12 @@ void read_cdat_data(struct cxl_port *port);
 void cxl_cor_error_detected(struct pci_dev *pdev);
 pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
 				    pci_channel_state_t state);
+
+#ifdef CONFIG_PCIEAER_CXL
+void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
+#else
+static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
+						struct device *host) { }
+#endif
+
 #endif /* __CXL_PCI_H__ */
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 31a2d73c963f..54b49d8e6b40 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -68,6 +68,7 @@ cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
 cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
 cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
 cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
+cxl_core-$(CONFIG_PCIEAER_CXL) += $(CXL_CORE_SRC)/pci_aer.o
 cxl_core-y += config_check.o
 cxl_core-y += cxl_core_test.o
 cxl_core-y += cxl_core_exports.o

base-commit: 347e9f5043c89695b01e66b3ed111755afcf1911
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
  2025-07-21 17:04 [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c Dave Jiang
@ 2025-07-22 10:57 ` Jonathan Cameron
  2025-07-22 14:08 ` Joshua Hahn
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Jonathan Cameron @ 2025-07-22 10:57 UTC (permalink / raw)
  To: Dave Jiang
  Cc: linux-cxl, dave, alison.schofield, vishal.l.verma, ira.weiny,
	dan.j.williams, Robert Richter, Terry Bowman

On Mon, 21 Jul 2025 10:04:15 -0700
Dave Jiang <dave.jiang@intel.com> wrote:

> Remove from core/pci_aer.c and move the CONFIG_PCIEAER_CXL ifdef to
> cxlpci.h to deal with the exported symbols as needed. There is enough
> AER handling code (and more to come) to move the AER code to its own
> C file.
> 
> Cc: Robert Richter <rrichter@amd.com>
> Cc: Terry Bowman <terry.bowman@amd.com>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
  2025-07-21 17:04 [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c Dave Jiang
  2025-07-22 10:57 ` Jonathan Cameron
@ 2025-07-22 14:08 ` Joshua Hahn
  2025-07-23 14:55 ` Bowman, Terry
  2025-07-23 21:35 ` Bowman, Terry
  3 siblings, 0 replies; 12+ messages in thread
From: Joshua Hahn @ 2025-07-22 14:08 UTC (permalink / raw)
  To: Dave Jiang
  Cc: linux-cxl, dave, jonathan.cameron, alison.schofield,
	vishal.l.verma, ira.weiny, dan.j.williams, Robert Richter,
	Terry Bowman

On Mon, 21 Jul 2025 10:04:15 -0700 Dave Jiang <dave.jiang@intel.com> wrote:

> Remove from core/pci_aer.c and move the CONFIG_PCIEAER_CXL ifdef to
> cxlpci.h to deal with the exported symbols as needed. There is enough
> AER handling code (and more to come) to move the AER code to its own
> C file.
> 
> Cc: Robert Richter <rrichter@amd.com>
> Cc: Terry Bowman <terry.bowman@amd.com>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
> v2:
> - Address 0-day reprot of cxl_handle_rdport_errors symbol undefined
> ---

Hello Dave, thank you for the patch! This makes a lot of sense to me and
it all looks good : -)

Feel free to add a
Reviewed-by: Joshua Hahn <joshua.hahnjy@gmail.com>

Have a great day!
Joshua

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
  2025-07-21 17:04 [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c Dave Jiang
  2025-07-22 10:57 ` Jonathan Cameron
  2025-07-22 14:08 ` Joshua Hahn
@ 2025-07-23 14:55 ` Bowman, Terry
  2025-07-23 15:14   ` Dave Jiang
  2025-07-23 21:35 ` Bowman, Terry
  3 siblings, 1 reply; 12+ messages in thread
From: Bowman, Terry @ 2025-07-23 14:55 UTC (permalink / raw)
  To: Dave Jiang, linux-cxl
  Cc: dave, jonathan.cameron, alison.schofield, vishal.l.verma,
	ira.weiny, dan.j.williams, Robert Richter

On 7/21/2025 12:04 PM, Dave Jiang wrote:
> Remove from core/pci_aer.c and move the CONFIG_PCIEAER_CXL ifdef to
> cxlpci.h to deal with the exported symbols as needed. There is enough
> AER handling code (and more to come) to move the AER code to its own
> C file.
> 
> Cc: Robert Richter <rrichter@amd.com>
> Cc: Terry Bowman <terry.bowman@amd.com>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
> v2:
> - Address 0-day reprot of cxl_handle_rdport_errors symbol undefined
> ---
>  drivers/cxl/core/Makefile  |   1 +
>  drivers/cxl/core/core.h    |   9 ++
>  drivers/cxl/core/pci.c     | 171 +------------------------------------
>  drivers/cxl/core/pci_aer.c | 168 ++++++++++++++++++++++++++++++++++++
>  drivers/cxl/cxl.h          |   8 --
>  drivers/cxl/cxlpci.h       |   8 ++
>  tools/testing/cxl/Kbuild   |   1 +
>  7 files changed, 189 insertions(+), 177 deletions(-)
>  create mode 100644 drivers/cxl/core/pci_aer.c
> 
> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> index 79e2ef81fde8..bcea856157af 100644
> --- a/drivers/cxl/core/Makefile
> +++ b/drivers/cxl/core/Makefile
> @@ -21,3 +21,4 @@ cxl_core-$(CONFIG_CXL_REGION) += region.o
>  cxl_core-$(CONFIG_CXL_MCE) += mce.o
>  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
> +cxl_core-$(CONFIG_PCIEAER_CXL) += pci_aer.o
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 29b61828a847..96c319c66a48 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -136,4 +136,13 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
>  		    u16 *return_code);
>  #endif
>  
> +#ifdef CONFIG_PCIEAER_CXL
> +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
> +#else
> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) {}
> +#endif
> +
> +void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
> +bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
> +
>  #endif /* __CXL_CORE_H__ */
> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
> index b50551601c2e..bd78369bc3f4 100644
> --- a/drivers/cxl/core/pci.c
> +++ b/drivers/cxl/core/pci.c
> @@ -664,8 +664,7 @@ void read_cdat_data(struct cxl_port *port)
>  }
>  EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
>  
> -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
> -				 void __iomem *ras_base)
> +void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
>  {
>  	void __iomem *addr;
>  	u32 status;
> @@ -707,8 +706,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
>   * Log the state of the RAS status registers and prepare them to log the
>   * next error status. Return 1 if reset needed.
>   */
> -static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
> -				  void __iomem *ras_base)
> +bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
>  {
>  	u32 hl[CXL_HEADERLOG_SIZE_U32];
>  	void __iomem *addr;
> @@ -746,171 +744,6 @@ static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
>  	return __cxl_handle_ras(cxlds, cxlds->regs.ras);
>  }
>  
> -#ifdef CONFIG_PCIEAER_CXL
> -
> -static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
> -{
> -	resource_size_t aer_phys;
> -	struct device *host;
> -	u16 aer_cap;
> -
> -	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
> -	if (aer_cap) {
> -		host = dport->reg_map.host;
> -		aer_phys = aer_cap + dport->rcrb.base;
> -		dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
> -						sizeof(struct aer_capability_regs));
> -	}
> -}
> -
> -static void cxl_dport_map_ras(struct cxl_dport *dport)
> -{
> -	struct cxl_register_map *map = &dport->reg_map;
> -	struct device *dev = dport->dport_dev;
> -
> -	if (!map->component_map.ras.valid)
> -		dev_dbg(dev, "RAS registers not found\n");
> -	else if (cxl_map_component_regs(map, &dport->regs.component,
> -					BIT(CXL_CM_CAP_CAP_ID_RAS)))
> -		dev_dbg(dev, "Failed to map RAS capability.\n");
> -}
> -
> -static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
> -{
> -	void __iomem *aer_base = dport->regs.dport_aer;
> -	u32 aer_cmd_mask, aer_cmd;
> -
> -	if (!aer_base)
> -		return;
> -
> -	/*
> -	 * Disable RCH root port command interrupts.
> -	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
> -	 *
> -	 * This sequence may not be necessary. CXL spec states disabling
> -	 * the root cmd register's interrupts is required. But, PCI spec
> -	 * shows these are disabled by default on reset.
> -	 */
> -	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
> -			PCI_ERR_ROOT_CMD_NONFATAL_EN |
> -			PCI_ERR_ROOT_CMD_FATAL_EN);
> -	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
> -	aer_cmd &= ~aer_cmd_mask;
> -	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
> -}
> -
> -/**
> - * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
> - * @dport: the cxl_dport that needs to be initialized
> - * @host: host device for devm operations
> - */
> -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
> -{
> -	dport->reg_map.host = host;
> -	cxl_dport_map_ras(dport);
> -
> -	if (dport->rch) {
> -		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
> -
> -		if (!host_bridge->native_aer)
> -			return;
> -
> -		cxl_dport_map_rch_aer(dport);
> -		cxl_disable_rch_root_ints(dport);
> -	}
> -}
> -EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
> -
> -static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
> -					  struct cxl_dport *dport)
> -{
> -	return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
> -}
> -
> -static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
> -				       struct cxl_dport *dport)
> -{
> -	return __cxl_handle_ras(cxlds, dport->regs.ras);
> -}
> -
> -/*
> - * Copy the AER capability registers using 32 bit read accesses.
> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the
> - * status after copying.
> - *
> - * @aer_base: base address of AER capability block in RCRB
> - * @aer_regs: destination for copying AER capability
> - */
> -static bool cxl_rch_get_aer_info(void __iomem *aer_base,
> -				 struct aer_capability_regs *aer_regs)
> -{
> -	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
> -	u32 *aer_regs_buf = (u32 *)aer_regs;
> -	int n;
> -
> -	if (!aer_base)
> -		return false;
> -
> -	/* Use readl() to guarantee 32-bit accesses */
> -	for (n = 0; n < read_cnt; n++)
> -		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
> -
> -	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
> -	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
> -
> -	return true;
> -}
> -
> -/* Get AER severity. Return false if there is no error. */
> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
> -				     int *severity)
> -{
> -	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
> -		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
> -			*severity = AER_FATAL;
> -		else
> -			*severity = AER_NONFATAL;
> -		return true;
> -	}
> -
> -	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
> -		*severity = AER_CORRECTABLE;
> -		return true;
> -	}
> -
> -	return false;
> -}
> -
> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
> -{
> -	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
> -	struct aer_capability_regs aer_regs;
> -	struct cxl_dport *dport;
> -	int severity;
> -
> -	struct cxl_port *port __free(put_cxl_port) =
> -		cxl_pci_find_port(pdev, &dport);
> -	if (!port)
> -		return;
> -
> -	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
> -		return;
> -
> -	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
> -		return;
> -
> -	pci_print_aer(pdev, severity, &aer_regs);
> -
> -	if (severity == AER_CORRECTABLE)
> -		cxl_handle_rdport_cor_ras(cxlds, dport);
> -	else
> -		cxl_handle_rdport_ras(cxlds, dport);
> -}
> -
> -#else
> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
> -#endif
> -
>  void cxl_cor_error_detected(struct pci_dev *pdev)
>  {
>  	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
> diff --git a/drivers/cxl/core/pci_aer.c b/drivers/cxl/core/pci_aer.c
> new file mode 100644
> index 000000000000..cf1e9377752e
> --- /dev/null
> +++ b/drivers/cxl/core/pci_aer.c



Hi Dave,

While adding this into my series and looking closer, I believe that pci_aer.c filename does not accurately reflect its content. 
The only AER-related code pertains to CX1.1, which is now a corner case. The AER driver, not CXL driver, logs AER errors for 
all other scenarios. There is no AER processing in the CXL driver except for CXL1.1 dports.

Most of the code in this file is related to or will be directed towards CXL RAS. It's important to note that AER and 
RAS are distinct and separate registers, with CXL RAS only utilizing the AER interrupt notification and UIE/CIE. 

I am concerned we are conflating AER and RAS, which may be misleading for others. We have other instances of this 
confusion, such as in the cxl_aer_correctable_error() and cxl_aer_uncorrectable_error() functions, which log RAS instead of AER.

My series aims to clarify the distinction between AER and CXL errors, categorizing CXL errors as a separate error class. 
At the least, can we add a documentation block at the top to indicate this source file provides functionality for 
CXL RAS?

-Terry

> @@ -0,0 +1,168 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */
> +/* Copyright(c) 2025 Intel Corporation. All rights reserved. */
> +#include <linux/pci.h>
> +#include <linux/aer.h>
> +#include <cxlpci.h>
> +#include <cxlmem.h>
> +#include <cxl.h>
> +#include "core.h"
> +
> +static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
> +{
> +	resource_size_t aer_phys;
> +	struct device *host;
> +	u16 aer_cap;
> +
> +	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
> +	if (aer_cap) {
> +		host = dport->reg_map.host;
> +		aer_phys = aer_cap + dport->rcrb.base;
> +		dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
> +						sizeof(struct aer_capability_regs));
> +	}
> +}
> +
> +static void cxl_dport_map_ras(struct cxl_dport *dport)
> +{
> +	struct cxl_register_map *map = &dport->reg_map;
> +	struct device *dev = dport->dport_dev;
> +
> +	if (!map->component_map.ras.valid)
> +		dev_dbg(dev, "RAS registers not found\n");
> +	else if (cxl_map_component_regs(map, &dport->regs.component,
> +					BIT(CXL_CM_CAP_CAP_ID_RAS)))
> +		dev_dbg(dev, "Failed to map RAS capability.\n");
> +}
> +
> +static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
> +{
> +	void __iomem *aer_base = dport->regs.dport_aer;
> +	u32 aer_cmd_mask, aer_cmd;
> +
> +	if (!aer_base)
> +		return;
> +
> +	/*
> +	 * Disable RCH root port command interrupts.
> +	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
> +	 *
> +	 * This sequence may not be necessary. CXL spec states disabling
> +	 * the root cmd register's interrupts is required. But, PCI spec
> +	 * shows these are disabled by default on reset.
> +	 */
> +	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
> +			PCI_ERR_ROOT_CMD_NONFATAL_EN |
> +			PCI_ERR_ROOT_CMD_FATAL_EN);
> +	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
> +	aer_cmd &= ~aer_cmd_mask;
> +	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
> +}
> +
> +/**
> + * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
> + * @dport: the cxl_dport that needs to be initialized
> + * @host: host device for devm operations
> + */
> +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
> +{
> +	dport->reg_map.host = host;
> +	cxl_dport_map_ras(dport);
> +
> +	if (dport->rch) {
> +		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
> +
> +		if (!host_bridge->native_aer)
> +			return;
> +
> +		cxl_dport_map_rch_aer(dport);
> +		cxl_disable_rch_root_ints(dport);
> +	}
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
> +
> +static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
> +					  struct cxl_dport *dport)
> +{
> +	return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
> +}
> +
> +static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
> +				       struct cxl_dport *dport)
> +{
> +	return __cxl_handle_ras(cxlds, dport->regs.ras);
> +}
> +
> +/*
> + * Copy the AER capability registers using 32 bit read accesses.
> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the
> + * status after copying.
> + *
> + * @aer_base: base address of AER capability block in RCRB
> + * @aer_regs: destination for copying AER capability
> + */
> +static bool cxl_rch_get_aer_info(void __iomem *aer_base,
> +				 struct aer_capability_regs *aer_regs)
> +{
> +	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
> +	u32 *aer_regs_buf = (u32 *)aer_regs;
> +	int n;
> +
> +	if (!aer_base)
> +		return false;
> +
> +	/* Use readl() to guarantee 32-bit accesses */
> +	for (n = 0; n < read_cnt; n++)
> +		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
> +
> +	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
> +	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
> +
> +	return true;
> +}
> +
> +/* Get AER severity. Return false if there is no error. */
> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
> +				     int *severity)
> +{
> +	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
> +		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
> +			*severity = AER_FATAL;
> +		else
> +			*severity = AER_NONFATAL;
> +		return true;
> +	}
> +
> +	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
> +		*severity = AER_CORRECTABLE;
> +		return true;
> +	}
> +
> +	return false;
> +}
> +
> +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
> +{
> +	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
> +	struct aer_capability_regs aer_regs;
> +	struct cxl_dport *dport;
> +	int severity;
> +
> +	struct cxl_port *port __free(put_cxl_port) =
> +		cxl_pci_find_port(pdev, &dport);
> +	if (!port)
> +		return;
> +
> +	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
> +		return;
> +
> +	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
> +		return;
> +
> +	pci_print_aer(pdev, severity, &aer_regs);
> +
> +	if (severity == AER_CORRECTABLE)
> +		cxl_handle_rdport_cor_ras(cxlds, dport);
> +	else
> +		cxl_handle_rdport_ras(cxlds, dport);
> +}
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 3f1695c96abc..73397da7f970 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -759,14 +759,6 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
>  					 struct device *dport_dev, int port_id,
>  					 resource_size_t rcrb);
>  
> -#ifdef CONFIG_PCIEAER_CXL
> -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
> -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
> -#else
> -static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
> -						struct device *host) { }
> -#endif
> -
>  struct cxl_decoder *to_cxl_decoder(struct device *dev);
>  struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
>  struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
> diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
> index 54e219b0049e..5f73ce42a758 100644
> --- a/drivers/cxl/cxlpci.h
> +++ b/drivers/cxl/cxlpci.h
> @@ -135,4 +135,12 @@ void read_cdat_data(struct cxl_port *port);
>  void cxl_cor_error_detected(struct pci_dev *pdev);
>  pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
>  				    pci_channel_state_t state);
> +
> +#ifdef CONFIG_PCIEAER_CXL
> +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
> +#else
> +static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
> +						struct device *host) { }
> +#endif
> +
>  #endif /* __CXL_PCI_H__ */
> diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
> index 31a2d73c963f..54b49d8e6b40 100644
> --- a/tools/testing/cxl/Kbuild
> +++ b/tools/testing/cxl/Kbuild
> @@ -68,6 +68,7 @@ cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
>  cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
>  cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
> +cxl_core-$(CONFIG_PCIEAER_CXL) += $(CXL_CORE_SRC)/pci_aer.o
>  cxl_core-y += config_check.o
>  cxl_core-y += cxl_core_test.o
>  cxl_core-y += cxl_core_exports.o
> 
> base-commit: 347e9f5043c89695b01e66b3ed111755afcf1911


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
  2025-07-23 14:55 ` Bowman, Terry
@ 2025-07-23 15:14   ` Dave Jiang
  2025-07-23 16:38     ` Bowman, Terry
  0 siblings, 1 reply; 12+ messages in thread
From: Dave Jiang @ 2025-07-23 15:14 UTC (permalink / raw)
  To: Bowman, Terry, linux-cxl
  Cc: dave, jonathan.cameron, alison.schofield, vishal.l.verma,
	ira.weiny, dan.j.williams, Robert Richter



On 7/23/25 7:55 AM, Bowman, Terry wrote:
> On 7/21/2025 12:04 PM, Dave Jiang wrote:
>> Remove from core/pci_aer.c and move the CONFIG_PCIEAER_CXL ifdef to
>> cxlpci.h to deal with the exported symbols as needed. There is enough
>> AER handling code (and more to come) to move the AER code to its own
>> C file.
>>
>> Cc: Robert Richter <rrichter@amd.com>
>> Cc: Terry Bowman <terry.bowman@amd.com>
>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>> ---
>> v2:
>> - Address 0-day reprot of cxl_handle_rdport_errors symbol undefined
>> ---
>>  drivers/cxl/core/Makefile  |   1 +
>>  drivers/cxl/core/core.h    |   9 ++
>>  drivers/cxl/core/pci.c     | 171 +------------------------------------
>>  drivers/cxl/core/pci_aer.c | 168 ++++++++++++++++++++++++++++++++++++
>>  drivers/cxl/cxl.h          |   8 --
>>  drivers/cxl/cxlpci.h       |   8 ++
>>  tools/testing/cxl/Kbuild   |   1 +
>>  7 files changed, 189 insertions(+), 177 deletions(-)
>>  create mode 100644 drivers/cxl/core/pci_aer.c
>>
>> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
>> index 79e2ef81fde8..bcea856157af 100644
>> --- a/drivers/cxl/core/Makefile
>> +++ b/drivers/cxl/core/Makefile
>> @@ -21,3 +21,4 @@ cxl_core-$(CONFIG_CXL_REGION) += region.o
>>  cxl_core-$(CONFIG_CXL_MCE) += mce.o
>>  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
>>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
>> +cxl_core-$(CONFIG_PCIEAER_CXL) += pci_aer.o
>> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
>> index 29b61828a847..96c319c66a48 100644
>> --- a/drivers/cxl/core/core.h
>> +++ b/drivers/cxl/core/core.h
>> @@ -136,4 +136,13 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
>>  		    u16 *return_code);
>>  #endif
>>  
>> +#ifdef CONFIG_PCIEAER_CXL
>> +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
>> +#else
>> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) {}
>> +#endif
>> +
>> +void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
>> +bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
>> +
>>  #endif /* __CXL_CORE_H__ */
>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>> index b50551601c2e..bd78369bc3f4 100644
>> --- a/drivers/cxl/core/pci.c
>> +++ b/drivers/cxl/core/pci.c
>> @@ -664,8 +664,7 @@ void read_cdat_data(struct cxl_port *port)
>>  }
>>  EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
>>  
>> -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
>> -				 void __iomem *ras_base)
>> +void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
>>  {
>>  	void __iomem *addr;
>>  	u32 status;
>> @@ -707,8 +706,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
>>   * Log the state of the RAS status registers and prepare them to log the
>>   * next error status. Return 1 if reset needed.
>>   */
>> -static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
>> -				  void __iomem *ras_base)
>> +bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
>>  {
>>  	u32 hl[CXL_HEADERLOG_SIZE_U32];
>>  	void __iomem *addr;
>> @@ -746,171 +744,6 @@ static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
>>  	return __cxl_handle_ras(cxlds, cxlds->regs.ras);
>>  }
>>  
>> -#ifdef CONFIG_PCIEAER_CXL
>> -
>> -static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>> -{
>> -	resource_size_t aer_phys;
>> -	struct device *host;
>> -	u16 aer_cap;
>> -
>> -	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
>> -	if (aer_cap) {
>> -		host = dport->reg_map.host;
>> -		aer_phys = aer_cap + dport->rcrb.base;
>> -		dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
>> -						sizeof(struct aer_capability_regs));
>> -	}
>> -}
>> -
>> -static void cxl_dport_map_ras(struct cxl_dport *dport)
>> -{
>> -	struct cxl_register_map *map = &dport->reg_map;
>> -	struct device *dev = dport->dport_dev;
>> -
>> -	if (!map->component_map.ras.valid)
>> -		dev_dbg(dev, "RAS registers not found\n");
>> -	else if (cxl_map_component_regs(map, &dport->regs.component,
>> -					BIT(CXL_CM_CAP_CAP_ID_RAS)))
>> -		dev_dbg(dev, "Failed to map RAS capability.\n");
>> -}
>> -
>> -static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>> -{
>> -	void __iomem *aer_base = dport->regs.dport_aer;
>> -	u32 aer_cmd_mask, aer_cmd;
>> -
>> -	if (!aer_base)
>> -		return;
>> -
>> -	/*
>> -	 * Disable RCH root port command interrupts.
>> -	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
>> -	 *
>> -	 * This sequence may not be necessary. CXL spec states disabling
>> -	 * the root cmd register's interrupts is required. But, PCI spec
>> -	 * shows these are disabled by default on reset.
>> -	 */
>> -	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
>> -			PCI_ERR_ROOT_CMD_NONFATAL_EN |
>> -			PCI_ERR_ROOT_CMD_FATAL_EN);
>> -	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
>> -	aer_cmd &= ~aer_cmd_mask;
>> -	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
>> -}
>> -
>> -/**
>> - * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
>> - * @dport: the cxl_dport that needs to be initialized
>> - * @host: host device for devm operations
>> - */
>> -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
>> -{
>> -	dport->reg_map.host = host;
>> -	cxl_dport_map_ras(dport);
>> -
>> -	if (dport->rch) {
>> -		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
>> -
>> -		if (!host_bridge->native_aer)
>> -			return;
>> -
>> -		cxl_dport_map_rch_aer(dport);
>> -		cxl_disable_rch_root_ints(dport);
>> -	}
>> -}
>> -EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
>> -
>> -static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
>> -					  struct cxl_dport *dport)
>> -{
>> -	return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
>> -}
>> -
>> -static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
>> -				       struct cxl_dport *dport)
>> -{
>> -	return __cxl_handle_ras(cxlds, dport->regs.ras);
>> -}
>> -
>> -/*
>> - * Copy the AER capability registers using 32 bit read accesses.
>> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>> - * status after copying.
>> - *
>> - * @aer_base: base address of AER capability block in RCRB
>> - * @aer_regs: destination for copying AER capability
>> - */
>> -static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>> -				 struct aer_capability_regs *aer_regs)
>> -{
>> -	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>> -	u32 *aer_regs_buf = (u32 *)aer_regs;
>> -	int n;
>> -
>> -	if (!aer_base)
>> -		return false;
>> -
>> -	/* Use readl() to guarantee 32-bit accesses */
>> -	for (n = 0; n < read_cnt; n++)
>> -		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>> -
>> -	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>> -	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>> -
>> -	return true;
>> -}
>> -
>> -/* Get AER severity. Return false if there is no error. */
>> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>> -				     int *severity)
>> -{
>> -	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>> -		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>> -			*severity = AER_FATAL;
>> -		else
>> -			*severity = AER_NONFATAL;
>> -		return true;
>> -	}
>> -
>> -	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>> -		*severity = AER_CORRECTABLE;
>> -		return true;
>> -	}
>> -
>> -	return false;
>> -}
>> -
>> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>> -{
>> -	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>> -	struct aer_capability_regs aer_regs;
>> -	struct cxl_dport *dport;
>> -	int severity;
>> -
>> -	struct cxl_port *port __free(put_cxl_port) =
>> -		cxl_pci_find_port(pdev, &dport);
>> -	if (!port)
>> -		return;
>> -
>> -	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>> -		return;
>> -
>> -	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>> -		return;
>> -
>> -	pci_print_aer(pdev, severity, &aer_regs);
>> -
>> -	if (severity == AER_CORRECTABLE)
>> -		cxl_handle_rdport_cor_ras(cxlds, dport);
>> -	else
>> -		cxl_handle_rdport_ras(cxlds, dport);
>> -}
>> -
>> -#else
>> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
>> -#endif
>> -
>>  void cxl_cor_error_detected(struct pci_dev *pdev)
>>  {
>>  	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
>> diff --git a/drivers/cxl/core/pci_aer.c b/drivers/cxl/core/pci_aer.c
>> new file mode 100644
>> index 000000000000..cf1e9377752e
>> --- /dev/null
>> +++ b/drivers/cxl/core/pci_aer.c
> 
> 
> 
> Hi Dave,
> 
> While adding this into my series and looking closer, I believe that pci_aer.c filename does not accurately reflect its content. 
> The only AER-related code pertains to CX1.1, which is now a corner case. The AER driver, not CXL driver, logs AER errors for 
> all other scenarios. There is no AER processing in the CXL driver except for CXL1.1 dports.
> 
> Most of the code in this file is related to or will be directed towards CXL RAS. It's important to note that AER and 
> RAS are distinct and separate registers, with CXL RAS only utilizing the AER interrupt notification and UIE/CIE. 
> 
> I am concerned we are conflating AER and RAS, which may be misleading for others. We have other instances of this 
> confusion, such as in the cxl_aer_correctable_error() and cxl_aer_uncorrectable_error() functions, which log RAS instead of AER.
> 
> My series aims to clarify the distinction between AER and CXL errors, categorizing CXL errors as a separate error class. 
> At the least, can we add a documentation block at the top to indicate this source file provides functionality for 
> CXL RAS?

Sure we can do that. Also, we have a core/ras.c already. Any suggestions on how do we want improve this? Move things to ras.c? Rename cxl_aer.c to something else? Rename current ras.c to something else?

> 
> -Terry
> 
>> @@ -0,0 +1,168 @@
>> +// SPDX-License-Identifier: GPL-2.0-only
>> +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */
>> +/* Copyright(c) 2025 Intel Corporation. All rights reserved. */
>> +#include <linux/pci.h>
>> +#include <linux/aer.h>
>> +#include <cxlpci.h>
>> +#include <cxlmem.h>
>> +#include <cxl.h>
>> +#include "core.h"
>> +
>> +static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>> +{
>> +	resource_size_t aer_phys;
>> +	struct device *host;
>> +	u16 aer_cap;
>> +
>> +	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
>> +	if (aer_cap) {
>> +		host = dport->reg_map.host;
>> +		aer_phys = aer_cap + dport->rcrb.base;
>> +		dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
>> +						sizeof(struct aer_capability_regs));
>> +	}
>> +}
>> +
>> +static void cxl_dport_map_ras(struct cxl_dport *dport)
>> +{
>> +	struct cxl_register_map *map = &dport->reg_map;
>> +	struct device *dev = dport->dport_dev;
>> +
>> +	if (!map->component_map.ras.valid)
>> +		dev_dbg(dev, "RAS registers not found\n");
>> +	else if (cxl_map_component_regs(map, &dport->regs.component,
>> +					BIT(CXL_CM_CAP_CAP_ID_RAS)))
>> +		dev_dbg(dev, "Failed to map RAS capability.\n");
>> +}
>> +
>> +static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>> +{
>> +	void __iomem *aer_base = dport->regs.dport_aer;
>> +	u32 aer_cmd_mask, aer_cmd;
>> +
>> +	if (!aer_base)
>> +		return;
>> +
>> +	/*
>> +	 * Disable RCH root port command interrupts.
>> +	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
>> +	 *
>> +	 * This sequence may not be necessary. CXL spec states disabling
>> +	 * the root cmd register's interrupts is required. But, PCI spec
>> +	 * shows these are disabled by default on reset.
>> +	 */
>> +	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
>> +			PCI_ERR_ROOT_CMD_NONFATAL_EN |
>> +			PCI_ERR_ROOT_CMD_FATAL_EN);
>> +	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
>> +	aer_cmd &= ~aer_cmd_mask;
>> +	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
>> +}
>> +
>> +/**
>> + * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
>> + * @dport: the cxl_dport that needs to be initialized
>> + * @host: host device for devm operations
>> + */
>> +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
>> +{
>> +	dport->reg_map.host = host;
>> +	cxl_dport_map_ras(dport);
>> +
>> +	if (dport->rch) {
>> +		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
>> +
>> +		if (!host_bridge->native_aer)
>> +			return;
>> +
>> +		cxl_dport_map_rch_aer(dport);
>> +		cxl_disable_rch_root_ints(dport);
>> +	}
>> +}
>> +EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
>> +
>> +static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
>> +					  struct cxl_dport *dport)
>> +{
>> +	return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
>> +}
>> +
>> +static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
>> +				       struct cxl_dport *dport)
>> +{
>> +	return __cxl_handle_ras(cxlds, dport->regs.ras);
>> +}
>> +
>> +/*
>> + * Copy the AER capability registers using 32 bit read accesses.
>> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>> + * status after copying.
>> + *
>> + * @aer_base: base address of AER capability block in RCRB
>> + * @aer_regs: destination for copying AER capability
>> + */
>> +static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>> +				 struct aer_capability_regs *aer_regs)
>> +{
>> +	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>> +	u32 *aer_regs_buf = (u32 *)aer_regs;
>> +	int n;
>> +
>> +	if (!aer_base)
>> +		return false;
>> +
>> +	/* Use readl() to guarantee 32-bit accesses */
>> +	for (n = 0; n < read_cnt; n++)
>> +		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>> +
>> +	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>> +	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>> +
>> +	return true;
>> +}
>> +
>> +/* Get AER severity. Return false if there is no error. */
>> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>> +				     int *severity)
>> +{
>> +	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>> +		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>> +			*severity = AER_FATAL;
>> +		else
>> +			*severity = AER_NONFATAL;
>> +		return true;
>> +	}
>> +
>> +	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>> +		*severity = AER_CORRECTABLE;
>> +		return true;
>> +	}
>> +
>> +	return false;
>> +}
>> +
>> +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>> +{
>> +	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>> +	struct aer_capability_regs aer_regs;
>> +	struct cxl_dport *dport;
>> +	int severity;
>> +
>> +	struct cxl_port *port __free(put_cxl_port) =
>> +		cxl_pci_find_port(pdev, &dport);
>> +	if (!port)
>> +		return;
>> +
>> +	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>> +		return;
>> +
>> +	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>> +		return;
>> +
>> +	pci_print_aer(pdev, severity, &aer_regs);
>> +
>> +	if (severity == AER_CORRECTABLE)
>> +		cxl_handle_rdport_cor_ras(cxlds, dport);
>> +	else
>> +		cxl_handle_rdport_ras(cxlds, dport);
>> +}
>> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
>> index 3f1695c96abc..73397da7f970 100644
>> --- a/drivers/cxl/cxl.h
>> +++ b/drivers/cxl/cxl.h
>> @@ -759,14 +759,6 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
>>  					 struct device *dport_dev, int port_id,
>>  					 resource_size_t rcrb);
>>  
>> -#ifdef CONFIG_PCIEAER_CXL
>> -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
>> -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
>> -#else
>> -static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
>> -						struct device *host) { }
>> -#endif
>> -
>>  struct cxl_decoder *to_cxl_decoder(struct device *dev);
>>  struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
>>  struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
>> diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
>> index 54e219b0049e..5f73ce42a758 100644
>> --- a/drivers/cxl/cxlpci.h
>> +++ b/drivers/cxl/cxlpci.h
>> @@ -135,4 +135,12 @@ void read_cdat_data(struct cxl_port *port);
>>  void cxl_cor_error_detected(struct pci_dev *pdev);
>>  pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
>>  				    pci_channel_state_t state);
>> +
>> +#ifdef CONFIG_PCIEAER_CXL
>> +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
>> +#else
>> +static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
>> +						struct device *host) { }
>> +#endif
>> +
>>  #endif /* __CXL_PCI_H__ */
>> diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
>> index 31a2d73c963f..54b49d8e6b40 100644
>> --- a/tools/testing/cxl/Kbuild
>> +++ b/tools/testing/cxl/Kbuild
>> @@ -68,6 +68,7 @@ cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
>>  cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
>>  cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
>>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
>> +cxl_core-$(CONFIG_PCIEAER_CXL) += $(CXL_CORE_SRC)/pci_aer.o
>>  cxl_core-y += config_check.o
>>  cxl_core-y += cxl_core_test.o
>>  cxl_core-y += cxl_core_exports.o
>>
>> base-commit: 347e9f5043c89695b01e66b3ed111755afcf1911
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
  2025-07-23 15:14   ` Dave Jiang
@ 2025-07-23 16:38     ` Bowman, Terry
  2025-07-23 16:43       ` Dave Jiang
  0 siblings, 1 reply; 12+ messages in thread
From: Bowman, Terry @ 2025-07-23 16:38 UTC (permalink / raw)
  To: Dave Jiang, linux-cxl
  Cc: dave, jonathan.cameron, alison.schofield, vishal.l.verma,
	ira.weiny, dan.j.williams, Robert Richter

On 7/23/2025 10:14 AM, Dave Jiang wrote:
> 
> 
> On 7/23/25 7:55 AM, Bowman, Terry wrote:
>> On 7/21/2025 12:04 PM, Dave Jiang wrote:
>>> Remove from core/pci_aer.c and move the CONFIG_PCIEAER_CXL ifdef to
>>> cxlpci.h to deal with the exported symbols as needed. There is enough
>>> AER handling code (and more to come) to move the AER code to its own
>>> C file.
>>>
>>> Cc: Robert Richter <rrichter@amd.com>
>>> Cc: Terry Bowman <terry.bowman@amd.com>
>>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>>> ---
>>> v2:
>>> - Address 0-day reprot of cxl_handle_rdport_errors symbol undefined
>>> ---
>>>  drivers/cxl/core/Makefile  |   1 +
>>>  drivers/cxl/core/core.h    |   9 ++
>>>  drivers/cxl/core/pci.c     | 171 +------------------------------------
>>>  drivers/cxl/core/pci_aer.c | 168 ++++++++++++++++++++++++++++++++++++
>>>  drivers/cxl/cxl.h          |   8 --
>>>  drivers/cxl/cxlpci.h       |   8 ++
>>>  tools/testing/cxl/Kbuild   |   1 +
>>>  7 files changed, 189 insertions(+), 177 deletions(-)
>>>  create mode 100644 drivers/cxl/core/pci_aer.c
>>>
>>> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
>>> index 79e2ef81fde8..bcea856157af 100644
>>> --- a/drivers/cxl/core/Makefile
>>> +++ b/drivers/cxl/core/Makefile
>>> @@ -21,3 +21,4 @@ cxl_core-$(CONFIG_CXL_REGION) += region.o
>>>  cxl_core-$(CONFIG_CXL_MCE) += mce.o
>>>  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
>>>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
>>> +cxl_core-$(CONFIG_PCIEAER_CXL) += pci_aer.o
>>> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
>>> index 29b61828a847..96c319c66a48 100644
>>> --- a/drivers/cxl/core/core.h
>>> +++ b/drivers/cxl/core/core.h
>>> @@ -136,4 +136,13 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
>>>  		    u16 *return_code);
>>>  #endif
>>>  
>>> +#ifdef CONFIG_PCIEAER_CXL
>>> +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
>>> +#else
>>> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) {}
>>> +#endif
>>> +
>>> +void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
>>> +bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
>>> +
>>>  #endif /* __CXL_CORE_H__ */
>>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>>> index b50551601c2e..bd78369bc3f4 100644
>>> --- a/drivers/cxl/core/pci.c
>>> +++ b/drivers/cxl/core/pci.c
>>> @@ -664,8 +664,7 @@ void read_cdat_data(struct cxl_port *port)
>>>  }
>>>  EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
>>>  
>>> -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
>>> -				 void __iomem *ras_base)
>>> +void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
>>>  {
>>>  	void __iomem *addr;
>>>  	u32 status;
>>> @@ -707,8 +706,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
>>>   * Log the state of the RAS status registers and prepare them to log the
>>>   * next error status. Return 1 if reset needed.
>>>   */
>>> -static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
>>> -				  void __iomem *ras_base)
>>> +bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
>>>  {
>>>  	u32 hl[CXL_HEADERLOG_SIZE_U32];
>>>  	void __iomem *addr;
>>> @@ -746,171 +744,6 @@ static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
>>>  	return __cxl_handle_ras(cxlds, cxlds->regs.ras);
>>>  }
>>>  
>>> -#ifdef CONFIG_PCIEAER_CXL
>>> -
>>> -static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>>> -{
>>> -	resource_size_t aer_phys;
>>> -	struct device *host;
>>> -	u16 aer_cap;
>>> -
>>> -	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
>>> -	if (aer_cap) {
>>> -		host = dport->reg_map.host;
>>> -		aer_phys = aer_cap + dport->rcrb.base;
>>> -		dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
>>> -						sizeof(struct aer_capability_regs));
>>> -	}
>>> -}
>>> -
>>> -static void cxl_dport_map_ras(struct cxl_dport *dport)
>>> -{
>>> -	struct cxl_register_map *map = &dport->reg_map;
>>> -	struct device *dev = dport->dport_dev;
>>> -
>>> -	if (!map->component_map.ras.valid)
>>> -		dev_dbg(dev, "RAS registers not found\n");
>>> -	else if (cxl_map_component_regs(map, &dport->regs.component,
>>> -					BIT(CXL_CM_CAP_CAP_ID_RAS)))
>>> -		dev_dbg(dev, "Failed to map RAS capability.\n");
>>> -}
>>> -
>>> -static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>>> -{
>>> -	void __iomem *aer_base = dport->regs.dport_aer;
>>> -	u32 aer_cmd_mask, aer_cmd;
>>> -
>>> -	if (!aer_base)
>>> -		return;
>>> -
>>> -	/*
>>> -	 * Disable RCH root port command interrupts.
>>> -	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
>>> -	 *
>>> -	 * This sequence may not be necessary. CXL spec states disabling
>>> -	 * the root cmd register's interrupts is required. But, PCI spec
>>> -	 * shows these are disabled by default on reset.
>>> -	 */
>>> -	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
>>> -			PCI_ERR_ROOT_CMD_NONFATAL_EN |
>>> -			PCI_ERR_ROOT_CMD_FATAL_EN);
>>> -	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
>>> -	aer_cmd &= ~aer_cmd_mask;
>>> -	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
>>> -}
>>> -
>>> -/**
>>> - * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
>>> - * @dport: the cxl_dport that needs to be initialized
>>> - * @host: host device for devm operations
>>> - */
>>> -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
>>> -{
>>> -	dport->reg_map.host = host;
>>> -	cxl_dport_map_ras(dport);
>>> -
>>> -	if (dport->rch) {
>>> -		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
>>> -
>>> -		if (!host_bridge->native_aer)
>>> -			return;
>>> -
>>> -		cxl_dport_map_rch_aer(dport);
>>> -		cxl_disable_rch_root_ints(dport);
>>> -	}
>>> -}
>>> -EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
>>> -
>>> -static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
>>> -					  struct cxl_dport *dport)
>>> -{
>>> -	return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
>>> -}
>>> -
>>> -static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
>>> -				       struct cxl_dport *dport)
>>> -{
>>> -	return __cxl_handle_ras(cxlds, dport->regs.ras);
>>> -}
>>> -
>>> -/*
>>> - * Copy the AER capability registers using 32 bit read accesses.
>>> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>>> - * status after copying.
>>> - *
>>> - * @aer_base: base address of AER capability block in RCRB
>>> - * @aer_regs: destination for copying AER capability
>>> - */
>>> -static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>>> -				 struct aer_capability_regs *aer_regs)
>>> -{
>>> -	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>>> -	u32 *aer_regs_buf = (u32 *)aer_regs;
>>> -	int n;
>>> -
>>> -	if (!aer_base)
>>> -		return false;
>>> -
>>> -	/* Use readl() to guarantee 32-bit accesses */
>>> -	for (n = 0; n < read_cnt; n++)
>>> -		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>>> -
>>> -	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>>> -	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>>> -
>>> -	return true;
>>> -}
>>> -
>>> -/* Get AER severity. Return false if there is no error. */
>>> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>>> -				     int *severity)
>>> -{
>>> -	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>>> -		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>>> -			*severity = AER_FATAL;
>>> -		else
>>> -			*severity = AER_NONFATAL;
>>> -		return true;
>>> -	}
>>> -
>>> -	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>>> -		*severity = AER_CORRECTABLE;
>>> -		return true;
>>> -	}
>>> -
>>> -	return false;
>>> -}
>>> -
>>> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>>> -{
>>> -	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>>> -	struct aer_capability_regs aer_regs;
>>> -	struct cxl_dport *dport;
>>> -	int severity;
>>> -
>>> -	struct cxl_port *port __free(put_cxl_port) =
>>> -		cxl_pci_find_port(pdev, &dport);
>>> -	if (!port)
>>> -		return;
>>> -
>>> -	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>>> -		return;
>>> -
>>> -	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>>> -		return;
>>> -
>>> -	pci_print_aer(pdev, severity, &aer_regs);
>>> -
>>> -	if (severity == AER_CORRECTABLE)
>>> -		cxl_handle_rdport_cor_ras(cxlds, dport);
>>> -	else
>>> -		cxl_handle_rdport_ras(cxlds, dport);
>>> -}
>>> -
>>> -#else
>>> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
>>> -#endif
>>> -
>>>  void cxl_cor_error_detected(struct pci_dev *pdev)
>>>  {
>>>  	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
>>> diff --git a/drivers/cxl/core/pci_aer.c b/drivers/cxl/core/pci_aer.c
>>> new file mode 100644
>>> index 000000000000..cf1e9377752e
>>> --- /dev/null
>>> +++ b/drivers/cxl/core/pci_aer.c
>>
>>
>>
>> Hi Dave,
>>
>> While adding this into my series and looking closer, I believe that pci_aer.c filename does not accurately reflect its content. 
>> The only AER-related code pertains to CX1.1, which is now a corner case. The AER driver, not CXL driver, logs AER errors for 
>> all other scenarios. There is no AER processing in the CXL driver except for CXL1.1 dports.
>>
>> Most of the code in this file is related to or will be directed towards CXL RAS. It's important to note that AER and 
>> RAS are distinct and separate registers, with CXL RAS only utilizing the AER interrupt notification and UIE/CIE. 
>>
>> I am concerned we are conflating AER and RAS, which may be misleading for others. We have other instances of this 
>> confusion, such as in the cxl_aer_correctable_error() and cxl_aer_uncorrectable_error() functions, which log RAS instead of AER.
>>
>> My series aims to clarify the distinction between AER and CXL errors, categorizing CXL errors as a separate error class. 
>> At the least, can we add a documentation block at the top to indicate this source file provides functionality for 
>> CXL RAS?
> 
> Sure we can do that. Also, we have a core/ras.c already. Any suggestions on how do we want improve this? Move things to ras.c? Rename cxl_aer.c to something else? Rename current ras.c to something else?
> 
>>

I don't think we want to combine the os-first and fw-first code in a single file. The separation leveraging the 
Makefile conditional compile is good because fw-first doesn't use PCIEAER_CXL while os-first does.
I think core/ras.c is good as-is.

cxl_aer.c is in the drivers/pci/pcie directory and is logically part of the AER service driver. This file includes the 
kfifo and logic for offloading CXL RAS errors to the CXL driver. I think the cxl_aer.c naming is fine but 
understand it could be argued to use cxl_ras.c instead. My concern was in the CXL driver AER-RAS naming but 
the same rules could be applied here as well. 

To improve the original situation mentioned I recommend we use 'ras' in what is named pci_aer.c here? 
Maybe 'pci_ras.c' instead? It's not perfect but would be more precise. This feels somewhat nitpicky on my part 
and will not be an issue if we leave as-is. Thanks for considering.

-Terry

>> -Terry
>>
>>> @@ -0,0 +1,168 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */
>>> +/* Copyright(c) 2025 Intel Corporation. All rights reserved. */
>>> +#include <linux/pci.h>
>>> +#include <linux/aer.h>
>>> +#include <cxlpci.h>
>>> +#include <cxlmem.h>
>>> +#include <cxl.h>
>>> +#include "core.h"
>>> +
>>> +static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>>> +{
>>> +	resource_size_t aer_phys;
>>> +	struct device *host;
>>> +	u16 aer_cap;
>>> +
>>> +	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
>>> +	if (aer_cap) {
>>> +		host = dport->reg_map.host;
>>> +		aer_phys = aer_cap + dport->rcrb.base;
>>> +		dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
>>> +						sizeof(struct aer_capability_regs));
>>> +	}
>>> +}
>>> +
>>> +static void cxl_dport_map_ras(struct cxl_dport *dport)
>>> +{
>>> +	struct cxl_register_map *map = &dport->reg_map;
>>> +	struct device *dev = dport->dport_dev;
>>> +
>>> +	if (!map->component_map.ras.valid)
>>> +		dev_dbg(dev, "RAS registers not found\n");
>>> +	else if (cxl_map_component_regs(map, &dport->regs.component,
>>> +					BIT(CXL_CM_CAP_CAP_ID_RAS)))
>>> +		dev_dbg(dev, "Failed to map RAS capability.\n");
>>> +}
>>> +
>>> +static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>>> +{
>>> +	void __iomem *aer_base = dport->regs.dport_aer;
>>> +	u32 aer_cmd_mask, aer_cmd;
>>> +
>>> +	if (!aer_base)
>>> +		return;
>>> +
>>> +	/*
>>> +	 * Disable RCH root port command interrupts.
>>> +	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
>>> +	 *
>>> +	 * This sequence may not be necessary. CXL spec states disabling
>>> +	 * the root cmd register's interrupts is required. But, PCI spec
>>> +	 * shows these are disabled by default on reset.
>>> +	 */
>>> +	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
>>> +			PCI_ERR_ROOT_CMD_NONFATAL_EN |
>>> +			PCI_ERR_ROOT_CMD_FATAL_EN);
>>> +	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
>>> +	aer_cmd &= ~aer_cmd_mask;
>>> +	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
>>> +}
>>> +
>>> +/**
>>> + * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
>>> + * @dport: the cxl_dport that needs to be initialized
>>> + * @host: host device for devm operations
>>> + */
>>> +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
>>> +{
>>> +	dport->reg_map.host = host;
>>> +	cxl_dport_map_ras(dport);
>>> +
>>> +	if (dport->rch) {
>>> +		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
>>> +
>>> +		if (!host_bridge->native_aer)
>>> +			return;
>>> +
>>> +		cxl_dport_map_rch_aer(dport);
>>> +		cxl_disable_rch_root_ints(dport);
>>> +	}
>>> +}
>>> +EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
>>> +
>>> +static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
>>> +					  struct cxl_dport *dport)
>>> +{
>>> +	return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
>>> +}
>>> +
>>> +static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
>>> +				       struct cxl_dport *dport)
>>> +{
>>> +	return __cxl_handle_ras(cxlds, dport->regs.ras);
>>> +}
>>> +
>>> +/*
>>> + * Copy the AER capability registers using 32 bit read accesses.
>>> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>>> + * status after copying.
>>> + *
>>> + * @aer_base: base address of AER capability block in RCRB
>>> + * @aer_regs: destination for copying AER capability
>>> + */
>>> +static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>>> +				 struct aer_capability_regs *aer_regs)
>>> +{
>>> +	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>>> +	u32 *aer_regs_buf = (u32 *)aer_regs;
>>> +	int n;
>>> +
>>> +	if (!aer_base)
>>> +		return false;
>>> +
>>> +	/* Use readl() to guarantee 32-bit accesses */
>>> +	for (n = 0; n < read_cnt; n++)
>>> +		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>>> +
>>> +	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>>> +	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>>> +
>>> +	return true;
>>> +}
>>> +
>>> +/* Get AER severity. Return false if there is no error. */
>>> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>>> +				     int *severity)
>>> +{
>>> +	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>>> +		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>>> +			*severity = AER_FATAL;
>>> +		else
>>> +			*severity = AER_NONFATAL;
>>> +		return true;
>>> +	}
>>> +
>>> +	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>>> +		*severity = AER_CORRECTABLE;
>>> +		return true;
>>> +	}
>>> +
>>> +	return false;
>>> +}
>>> +
>>> +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>>> +{
>>> +	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>>> +	struct aer_capability_regs aer_regs;
>>> +	struct cxl_dport *dport;
>>> +	int severity;
>>> +
>>> +	struct cxl_port *port __free(put_cxl_port) =
>>> +		cxl_pci_find_port(pdev, &dport);
>>> +	if (!port)
>>> +		return;
>>> +
>>> +	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>>> +		return;
>>> +
>>> +	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>>> +		return;
>>> +
>>> +	pci_print_aer(pdev, severity, &aer_regs);
>>> +
>>> +	if (severity == AER_CORRECTABLE)
>>> +		cxl_handle_rdport_cor_ras(cxlds, dport);
>>> +	else
>>> +		cxl_handle_rdport_ras(cxlds, dport);
>>> +}
>>> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
>>> index 3f1695c96abc..73397da7f970 100644
>>> --- a/drivers/cxl/cxl.h
>>> +++ b/drivers/cxl/cxl.h
>>> @@ -759,14 +759,6 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
>>>  					 struct device *dport_dev, int port_id,
>>>  					 resource_size_t rcrb);
>>>  
>>> -#ifdef CONFIG_PCIEAER_CXL
>>> -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
>>> -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
>>> -#else
>>> -static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
>>> -						struct device *host) { }
>>> -#endif
>>> -
>>>  struct cxl_decoder *to_cxl_decoder(struct device *dev);
>>>  struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
>>>  struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
>>> diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
>>> index 54e219b0049e..5f73ce42a758 100644
>>> --- a/drivers/cxl/cxlpci.h
>>> +++ b/drivers/cxl/cxlpci.h
>>> @@ -135,4 +135,12 @@ void read_cdat_data(struct cxl_port *port);
>>>  void cxl_cor_error_detected(struct pci_dev *pdev);
>>>  pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
>>>  				    pci_channel_state_t state);
>>> +
>>> +#ifdef CONFIG_PCIEAER_CXL
>>> +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
>>> +#else
>>> +static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
>>> +						struct device *host) { }
>>> +#endif
>>> +
>>>  #endif /* __CXL_PCI_H__ */
>>> diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
>>> index 31a2d73c963f..54b49d8e6b40 100644
>>> --- a/tools/testing/cxl/Kbuild
>>> +++ b/tools/testing/cxl/Kbuild
>>> @@ -68,6 +68,7 @@ cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
>>>  cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
>>>  cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
>>>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
>>> +cxl_core-$(CONFIG_PCIEAER_CXL) += $(CXL_CORE_SRC)/pci_aer.o
>>>  cxl_core-y += config_check.o
>>>  cxl_core-y += cxl_core_test.o
>>>  cxl_core-y += cxl_core_exports.o
>>>
>>> base-commit: 347e9f5043c89695b01e66b3ed111755afcf1911
>>
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
  2025-07-23 16:38     ` Bowman, Terry
@ 2025-07-23 16:43       ` Dave Jiang
  2025-07-23 18:53         ` dan.j.williams
  0 siblings, 1 reply; 12+ messages in thread
From: Dave Jiang @ 2025-07-23 16:43 UTC (permalink / raw)
  To: Bowman, Terry, linux-cxl
  Cc: dave, jonathan.cameron, alison.schofield, vishal.l.verma,
	ira.weiny, dan.j.williams, Robert Richter



On 7/23/25 9:38 AM, Bowman, Terry wrote:
> On 7/23/2025 10:14 AM, Dave Jiang wrote:
>>
>>
>> On 7/23/25 7:55 AM, Bowman, Terry wrote:
>>> On 7/21/2025 12:04 PM, Dave Jiang wrote:
>>>> Remove from core/pci_aer.c and move the CONFIG_PCIEAER_CXL ifdef to
>>>> cxlpci.h to deal with the exported symbols as needed. There is enough
>>>> AER handling code (and more to come) to move the AER code to its own
>>>> C file.
>>>>
>>>> Cc: Robert Richter <rrichter@amd.com>
>>>> Cc: Terry Bowman <terry.bowman@amd.com>
>>>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>>>> ---
>>>> v2:
>>>> - Address 0-day reprot of cxl_handle_rdport_errors symbol undefined
>>>> ---
>>>>  drivers/cxl/core/Makefile  |   1 +
>>>>  drivers/cxl/core/core.h    |   9 ++
>>>>  drivers/cxl/core/pci.c     | 171 +------------------------------------
>>>>  drivers/cxl/core/pci_aer.c | 168 ++++++++++++++++++++++++++++++++++++
>>>>  drivers/cxl/cxl.h          |   8 --
>>>>  drivers/cxl/cxlpci.h       |   8 ++
>>>>  tools/testing/cxl/Kbuild   |   1 +
>>>>  7 files changed, 189 insertions(+), 177 deletions(-)
>>>>  create mode 100644 drivers/cxl/core/pci_aer.c
>>>>
>>>> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
>>>> index 79e2ef81fde8..bcea856157af 100644
>>>> --- a/drivers/cxl/core/Makefile
>>>> +++ b/drivers/cxl/core/Makefile
>>>> @@ -21,3 +21,4 @@ cxl_core-$(CONFIG_CXL_REGION) += region.o
>>>>  cxl_core-$(CONFIG_CXL_MCE) += mce.o
>>>>  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
>>>>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
>>>> +cxl_core-$(CONFIG_PCIEAER_CXL) += pci_aer.o
>>>> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
>>>> index 29b61828a847..96c319c66a48 100644
>>>> --- a/drivers/cxl/core/core.h
>>>> +++ b/drivers/cxl/core/core.h
>>>> @@ -136,4 +136,13 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
>>>>  		    u16 *return_code);
>>>>  #endif
>>>>  
>>>> +#ifdef CONFIG_PCIEAER_CXL
>>>> +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
>>>> +#else
>>>> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) {}
>>>> +#endif
>>>> +
>>>> +void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
>>>> +bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
>>>> +
>>>>  #endif /* __CXL_CORE_H__ */
>>>> diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
>>>> index b50551601c2e..bd78369bc3f4 100644
>>>> --- a/drivers/cxl/core/pci.c
>>>> +++ b/drivers/cxl/core/pci.c
>>>> @@ -664,8 +664,7 @@ void read_cdat_data(struct cxl_port *port)
>>>>  }
>>>>  EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL");
>>>>  
>>>> -static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds,
>>>> -				 void __iomem *ras_base)
>>>> +void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
>>>>  {
>>>>  	void __iomem *addr;
>>>>  	u32 status;
>>>> @@ -707,8 +706,7 @@ static void header_log_copy(void __iomem *ras_base, u32 *log)
>>>>   * Log the state of the RAS status registers and prepare them to log the
>>>>   * next error status. Return 1 if reset needed.
>>>>   */
>>>> -static bool __cxl_handle_ras(struct cxl_dev_state *cxlds,
>>>> -				  void __iomem *ras_base)
>>>> +bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base)
>>>>  {
>>>>  	u32 hl[CXL_HEADERLOG_SIZE_U32];
>>>>  	void __iomem *addr;
>>>> @@ -746,171 +744,6 @@ static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds)
>>>>  	return __cxl_handle_ras(cxlds, cxlds->regs.ras);
>>>>  }
>>>>  
>>>> -#ifdef CONFIG_PCIEAER_CXL
>>>> -
>>>> -static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>>>> -{
>>>> -	resource_size_t aer_phys;
>>>> -	struct device *host;
>>>> -	u16 aer_cap;
>>>> -
>>>> -	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
>>>> -	if (aer_cap) {
>>>> -		host = dport->reg_map.host;
>>>> -		aer_phys = aer_cap + dport->rcrb.base;
>>>> -		dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
>>>> -						sizeof(struct aer_capability_regs));
>>>> -	}
>>>> -}
>>>> -
>>>> -static void cxl_dport_map_ras(struct cxl_dport *dport)
>>>> -{
>>>> -	struct cxl_register_map *map = &dport->reg_map;
>>>> -	struct device *dev = dport->dport_dev;
>>>> -
>>>> -	if (!map->component_map.ras.valid)
>>>> -		dev_dbg(dev, "RAS registers not found\n");
>>>> -	else if (cxl_map_component_regs(map, &dport->regs.component,
>>>> -					BIT(CXL_CM_CAP_CAP_ID_RAS)))
>>>> -		dev_dbg(dev, "Failed to map RAS capability.\n");
>>>> -}
>>>> -
>>>> -static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>>>> -{
>>>> -	void __iomem *aer_base = dport->regs.dport_aer;
>>>> -	u32 aer_cmd_mask, aer_cmd;
>>>> -
>>>> -	if (!aer_base)
>>>> -		return;
>>>> -
>>>> -	/*
>>>> -	 * Disable RCH root port command interrupts.
>>>> -	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
>>>> -	 *
>>>> -	 * This sequence may not be necessary. CXL spec states disabling
>>>> -	 * the root cmd register's interrupts is required. But, PCI spec
>>>> -	 * shows these are disabled by default on reset.
>>>> -	 */
>>>> -	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
>>>> -			PCI_ERR_ROOT_CMD_NONFATAL_EN |
>>>> -			PCI_ERR_ROOT_CMD_FATAL_EN);
>>>> -	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
>>>> -	aer_cmd &= ~aer_cmd_mask;
>>>> -	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
>>>> -}
>>>> -
>>>> -/**
>>>> - * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
>>>> - * @dport: the cxl_dport that needs to be initialized
>>>> - * @host: host device for devm operations
>>>> - */
>>>> -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
>>>> -{
>>>> -	dport->reg_map.host = host;
>>>> -	cxl_dport_map_ras(dport);
>>>> -
>>>> -	if (dport->rch) {
>>>> -		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
>>>> -
>>>> -		if (!host_bridge->native_aer)
>>>> -			return;
>>>> -
>>>> -		cxl_dport_map_rch_aer(dport);
>>>> -		cxl_disable_rch_root_ints(dport);
>>>> -	}
>>>> -}
>>>> -EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
>>>> -
>>>> -static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
>>>> -					  struct cxl_dport *dport)
>>>> -{
>>>> -	return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
>>>> -}
>>>> -
>>>> -static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
>>>> -				       struct cxl_dport *dport)
>>>> -{
>>>> -	return __cxl_handle_ras(cxlds, dport->regs.ras);
>>>> -}
>>>> -
>>>> -/*
>>>> - * Copy the AER capability registers using 32 bit read accesses.
>>>> - * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>>>> - * status after copying.
>>>> - *
>>>> - * @aer_base: base address of AER capability block in RCRB
>>>> - * @aer_regs: destination for copying AER capability
>>>> - */
>>>> -static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>>>> -				 struct aer_capability_regs *aer_regs)
>>>> -{
>>>> -	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>>>> -	u32 *aer_regs_buf = (u32 *)aer_regs;
>>>> -	int n;
>>>> -
>>>> -	if (!aer_base)
>>>> -		return false;
>>>> -
>>>> -	/* Use readl() to guarantee 32-bit accesses */
>>>> -	for (n = 0; n < read_cnt; n++)
>>>> -		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>>>> -
>>>> -	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>>>> -	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>>>> -
>>>> -	return true;
>>>> -}
>>>> -
>>>> -/* Get AER severity. Return false if there is no error. */
>>>> -static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>>>> -				     int *severity)
>>>> -{
>>>> -	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>>>> -		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>>>> -			*severity = AER_FATAL;
>>>> -		else
>>>> -			*severity = AER_NONFATAL;
>>>> -		return true;
>>>> -	}
>>>> -
>>>> -	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>>>> -		*severity = AER_CORRECTABLE;
>>>> -		return true;
>>>> -	}
>>>> -
>>>> -	return false;
>>>> -}
>>>> -
>>>> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>>>> -{
>>>> -	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>>>> -	struct aer_capability_regs aer_regs;
>>>> -	struct cxl_dport *dport;
>>>> -	int severity;
>>>> -
>>>> -	struct cxl_port *port __free(put_cxl_port) =
>>>> -		cxl_pci_find_port(pdev, &dport);
>>>> -	if (!port)
>>>> -		return;
>>>> -
>>>> -	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>>>> -		return;
>>>> -
>>>> -	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>>>> -		return;
>>>> -
>>>> -	pci_print_aer(pdev, severity, &aer_regs);
>>>> -
>>>> -	if (severity == AER_CORRECTABLE)
>>>> -		cxl_handle_rdport_cor_ras(cxlds, dport);
>>>> -	else
>>>> -		cxl_handle_rdport_ras(cxlds, dport);
>>>> -}
>>>> -
>>>> -#else
>>>> -static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { }
>>>> -#endif
>>>> -
>>>>  void cxl_cor_error_detected(struct pci_dev *pdev)
>>>>  {
>>>>  	struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
>>>> diff --git a/drivers/cxl/core/pci_aer.c b/drivers/cxl/core/pci_aer.c
>>>> new file mode 100644
>>>> index 000000000000..cf1e9377752e
>>>> --- /dev/null
>>>> +++ b/drivers/cxl/core/pci_aer.c
>>>
>>>
>>>
>>> Hi Dave,
>>>
>>> While adding this into my series and looking closer, I believe that pci_aer.c filename does not accurately reflect its content. 
>>> The only AER-related code pertains to CX1.1, which is now a corner case. The AER driver, not CXL driver, logs AER errors for 
>>> all other scenarios. There is no AER processing in the CXL driver except for CXL1.1 dports.
>>>
>>> Most of the code in this file is related to or will be directed towards CXL RAS. It's important to note that AER and 
>>> RAS are distinct and separate registers, with CXL RAS only utilizing the AER interrupt notification and UIE/CIE. 
>>>
>>> I am concerned we are conflating AER and RAS, which may be misleading for others. We have other instances of this 
>>> confusion, such as in the cxl_aer_correctable_error() and cxl_aer_uncorrectable_error() functions, which log RAS instead of AER.
>>>
>>> My series aims to clarify the distinction between AER and CXL errors, categorizing CXL errors as a separate error class. 
>>> At the least, can we add a documentation block at the top to indicate this source file provides functionality for 
>>> CXL RAS?
>>
>> Sure we can do that. Also, we have a core/ras.c already. Any suggestions on how do we want improve this? Move things to ras.c? Rename cxl_aer.c to something else? Rename current ras.c to something else?
>>
>>>
> 
> I don't think we want to combine the os-first and fw-first code in a single file. The separation leveraging the 
> Makefile conditional compile is good because fw-first doesn't use PCIEAER_CXL while os-first does.
> I think core/ras.c is good as-is.
> 
> cxl_aer.c is in the drivers/pci/pcie directory and is logically part of the AER service driver. This file includes the 
> kfifo and logic for offloading CXL RAS errors to the CXL driver. I think the cxl_aer.c naming is fine but 
> understand it could be argued to use cxl_ras.c instead. My concern was in the CXL driver AER-RAS naming but 
> the same rules could be applied here as well. 
> 
> To improve the original situation mentioned I recommend we use 'ras' in what is named pci_aer.c here? 
> Maybe 'pci_ras.c' instead? It's not perfect but would be more precise. This feels somewhat nitpicky on my part 
> and will not be an issue if we leave as-is. Thanks for considering.

Thanks for the detailed explanation. I'll rename it to pci_ras.c.

> 
> -Terry
> 
>>> -Terry
>>>
>>>> @@ -0,0 +1,168 @@
>>>> +// SPDX-License-Identifier: GPL-2.0-only
>>>> +/* Copyright(c) 2025 AMD Corporation. All rights reserved. */
>>>> +/* Copyright(c) 2025 Intel Corporation. All rights reserved. */
>>>> +#include <linux/pci.h>
>>>> +#include <linux/aer.h>
>>>> +#include <cxlpci.h>
>>>> +#include <cxlmem.h>
>>>> +#include <cxl.h>
>>>> +#include "core.h"
>>>> +
>>>> +static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
>>>> +{
>>>> +	resource_size_t aer_phys;
>>>> +	struct device *host;
>>>> +	u16 aer_cap;
>>>> +
>>>> +	aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base);
>>>> +	if (aer_cap) {
>>>> +		host = dport->reg_map.host;
>>>> +		aer_phys = aer_cap + dport->rcrb.base;
>>>> +		dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys,
>>>> +						sizeof(struct aer_capability_regs));
>>>> +	}
>>>> +}
>>>> +
>>>> +static void cxl_dport_map_ras(struct cxl_dport *dport)
>>>> +{
>>>> +	struct cxl_register_map *map = &dport->reg_map;
>>>> +	struct device *dev = dport->dport_dev;
>>>> +
>>>> +	if (!map->component_map.ras.valid)
>>>> +		dev_dbg(dev, "RAS registers not found\n");
>>>> +	else if (cxl_map_component_regs(map, &dport->regs.component,
>>>> +					BIT(CXL_CM_CAP_CAP_ID_RAS)))
>>>> +		dev_dbg(dev, "Failed to map RAS capability.\n");
>>>> +}
>>>> +
>>>> +static void cxl_disable_rch_root_ints(struct cxl_dport *dport)
>>>> +{
>>>> +	void __iomem *aer_base = dport->regs.dport_aer;
>>>> +	u32 aer_cmd_mask, aer_cmd;
>>>> +
>>>> +	if (!aer_base)
>>>> +		return;
>>>> +
>>>> +	/*
>>>> +	 * Disable RCH root port command interrupts.
>>>> +	 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors
>>>> +	 *
>>>> +	 * This sequence may not be necessary. CXL spec states disabling
>>>> +	 * the root cmd register's interrupts is required. But, PCI spec
>>>> +	 * shows these are disabled by default on reset.
>>>> +	 */
>>>> +	aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN |
>>>> +			PCI_ERR_ROOT_CMD_NONFATAL_EN |
>>>> +			PCI_ERR_ROOT_CMD_FATAL_EN);
>>>> +	aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND);
>>>> +	aer_cmd &= ~aer_cmd_mask;
>>>> +	writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND);
>>>> +}
>>>> +
>>>> +/**
>>>> + * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
>>>> + * @dport: the cxl_dport that needs to be initialized
>>>> + * @host: host device for devm operations
>>>> + */
>>>> +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
>>>> +{
>>>> +	dport->reg_map.host = host;
>>>> +	cxl_dport_map_ras(dport);
>>>> +
>>>> +	if (dport->rch) {
>>>> +		struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev);
>>>> +
>>>> +		if (!host_bridge->native_aer)
>>>> +			return;
>>>> +
>>>> +		cxl_dport_map_rch_aer(dport);
>>>> +		cxl_disable_rch_root_ints(dport);
>>>> +	}
>>>> +}
>>>> +EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
>>>> +
>>>> +static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds,
>>>> +					  struct cxl_dport *dport)
>>>> +{
>>>> +	return __cxl_handle_cor_ras(cxlds, dport->regs.ras);
>>>> +}
>>>> +
>>>> +static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds,
>>>> +				       struct cxl_dport *dport)
>>>> +{
>>>> +	return __cxl_handle_ras(cxlds, dport->regs.ras);
>>>> +}
>>>> +
>>>> +/*
>>>> + * Copy the AER capability registers using 32 bit read accesses.
>>>> + * This is necessary because RCRB AER capability is MMIO mapped. Clear the
>>>> + * status after copying.
>>>> + *
>>>> + * @aer_base: base address of AER capability block in RCRB
>>>> + * @aer_regs: destination for copying AER capability
>>>> + */
>>>> +static bool cxl_rch_get_aer_info(void __iomem *aer_base,
>>>> +				 struct aer_capability_regs *aer_regs)
>>>> +{
>>>> +	int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32);
>>>> +	u32 *aer_regs_buf = (u32 *)aer_regs;
>>>> +	int n;
>>>> +
>>>> +	if (!aer_base)
>>>> +		return false;
>>>> +
>>>> +	/* Use readl() to guarantee 32-bit accesses */
>>>> +	for (n = 0; n < read_cnt; n++)
>>>> +		aer_regs_buf[n] = readl(aer_base + n * sizeof(u32));
>>>> +
>>>> +	writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS);
>>>> +	writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS);
>>>> +
>>>> +	return true;
>>>> +}
>>>> +
>>>> +/* Get AER severity. Return false if there is no error. */
>>>> +static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs,
>>>> +				     int *severity)
>>>> +{
>>>> +	if (aer_regs->uncor_status & ~aer_regs->uncor_mask) {
>>>> +		if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV)
>>>> +			*severity = AER_FATAL;
>>>> +		else
>>>> +			*severity = AER_NONFATAL;
>>>> +		return true;
>>>> +	}
>>>> +
>>>> +	if (aer_regs->cor_status & ~aer_regs->cor_mask) {
>>>> +		*severity = AER_CORRECTABLE;
>>>> +		return true;
>>>> +	}
>>>> +
>>>> +	return false;
>>>> +}
>>>> +
>>>> +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds)
>>>> +{
>>>> +	struct pci_dev *pdev = to_pci_dev(cxlds->dev);
>>>> +	struct aer_capability_regs aer_regs;
>>>> +	struct cxl_dport *dport;
>>>> +	int severity;
>>>> +
>>>> +	struct cxl_port *port __free(put_cxl_port) =
>>>> +		cxl_pci_find_port(pdev, &dport);
>>>> +	if (!port)
>>>> +		return;
>>>> +
>>>> +	if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs))
>>>> +		return;
>>>> +
>>>> +	if (!cxl_rch_get_aer_severity(&aer_regs, &severity))
>>>> +		return;
>>>> +
>>>> +	pci_print_aer(pdev, severity, &aer_regs);
>>>> +
>>>> +	if (severity == AER_CORRECTABLE)
>>>> +		cxl_handle_rdport_cor_ras(cxlds, dport);
>>>> +	else
>>>> +		cxl_handle_rdport_ras(cxlds, dport);
>>>> +}
>>>> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
>>>> index 3f1695c96abc..73397da7f970 100644
>>>> --- a/drivers/cxl/cxl.h
>>>> +++ b/drivers/cxl/cxl.h
>>>> @@ -759,14 +759,6 @@ struct cxl_dport *devm_cxl_add_rch_dport(struct cxl_port *port,
>>>>  					 struct device *dport_dev, int port_id,
>>>>  					 resource_size_t rcrb);
>>>>  
>>>> -#ifdef CONFIG_PCIEAER_CXL
>>>> -void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport);
>>>> -void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
>>>> -#else
>>>> -static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
>>>> -						struct device *host) { }
>>>> -#endif
>>>> -
>>>>  struct cxl_decoder *to_cxl_decoder(struct device *dev);
>>>>  struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev);
>>>>  struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
>>>> diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
>>>> index 54e219b0049e..5f73ce42a758 100644
>>>> --- a/drivers/cxl/cxlpci.h
>>>> +++ b/drivers/cxl/cxlpci.h
>>>> @@ -135,4 +135,12 @@ void read_cdat_data(struct cxl_port *port);
>>>>  void cxl_cor_error_detected(struct pci_dev *pdev);
>>>>  pci_ers_result_t cxl_error_detected(struct pci_dev *pdev,
>>>>  				    pci_channel_state_t state);
>>>> +
>>>> +#ifdef CONFIG_PCIEAER_CXL
>>>> +void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host);
>>>> +#else
>>>> +static inline void cxl_dport_init_ras_reporting(struct cxl_dport *dport,
>>>> +						struct device *host) { }
>>>> +#endif
>>>> +
>>>>  #endif /* __CXL_PCI_H__ */
>>>> diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
>>>> index 31a2d73c963f..54b49d8e6b40 100644
>>>> --- a/tools/testing/cxl/Kbuild
>>>> +++ b/tools/testing/cxl/Kbuild
>>>> @@ -68,6 +68,7 @@ cxl_core-$(CONFIG_CXL_REGION) += $(CXL_CORE_SRC)/region.o
>>>>  cxl_core-$(CONFIG_CXL_MCE) += $(CXL_CORE_SRC)/mce.o
>>>>  cxl_core-$(CONFIG_CXL_FEATURES) += $(CXL_CORE_SRC)/features.o
>>>>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += $(CXL_CORE_SRC)/edac.o
>>>> +cxl_core-$(CONFIG_PCIEAER_CXL) += $(CXL_CORE_SRC)/pci_aer.o
>>>>  cxl_core-y += config_check.o
>>>>  cxl_core-y += cxl_core_test.o
>>>>  cxl_core-y += cxl_core_exports.o
>>>>
>>>> base-commit: 347e9f5043c89695b01e66b3ed111755afcf1911
>>>
>>
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
  2025-07-23 16:43       ` Dave Jiang
@ 2025-07-23 18:53         ` dan.j.williams
  2025-07-23 22:14           ` Dave Jiang
  0 siblings, 1 reply; 12+ messages in thread
From: dan.j.williams @ 2025-07-23 18:53 UTC (permalink / raw)
  To: Dave Jiang, Bowman, Terry, linux-cxl
  Cc: dave, jonathan.cameron, alison.schofield, vishal.l.verma,
	ira.weiny, dan.j.williams, Robert Richter

Dave Jiang wrote:
[..]
> > I don't think we want to combine the os-first and fw-first code in a
> > single file. The separation leveraging the Makefile conditional
> > compile is good because fw-first doesn't use PCIEAER_CXL while
> > os-first does.  I think core/ras.c is good as-is.
> > 
> > cxl_aer.c is in the drivers/pci/pcie directory and is logically part
> > of the AER service driver. This file includes the kfifo and logic
> > for offloading CXL RAS errors to the CXL driver. I think the
> > cxl_aer.c naming is fine but understand it could be argued to use
> > cxl_ras.c instead. My concern was in the CXL driver AER-RAS naming
> > but the same rules could be applied here as well. 
> > 
> > To improve the original situation mentioned I recommend we use 'ras'
> > in what is named pci_aer.c here?  Maybe 'pci_ras.c' instead? It's
> > not perfect but would be more precise. This feels somewhat nitpicky
> > on my part and will not be an issue if we leave as-is. Thanks for
> > considering.
> 
> Thanks for the detailed explanation. I'll rename it to pci_ras.c.

Hey guys, please trim your responses.

Between RAS, AER, and EDAC there is significant overlap. You could say
that pci_ras.c is also misnamed because the error details being reported
are CXL not PCI. I am ok with "RAS" for anything that handles decorating
an AER notification with extra information, and "EDAC" for all the other
error notification / reporting / and management flows that handled
separately from AER.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
  2025-07-21 17:04 [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c Dave Jiang
                   ` (2 preceding siblings ...)
  2025-07-23 14:55 ` Bowman, Terry
@ 2025-07-23 21:35 ` Bowman, Terry
  2025-07-23 22:11   ` Dave Jiang
  3 siblings, 1 reply; 12+ messages in thread
From: Bowman, Terry @ 2025-07-23 21:35 UTC (permalink / raw)
  To: Dave Jiang, linux-cxl
  Cc: dave, jonathan.cameron, alison.schofield, vishal.l.verma,
	ira.weiny, dan.j.williams, Robert Richter

On 7/21/2025 12:04 PM, Dave Jiang wrote:
> Remove from core/pci_aer.c and move the CONFIG_PCIEAER_CXL ifdef to
> cxlpci.h to deal with the exported symbols as needed. There is enough
> AER handling code (and more to come) to move the AER code to its own
> C file.
> 
> Cc: Robert Richter <rrichter@amd.com>
> Cc: Terry Bowman <terry.bowman@amd.com>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> ---
> v2:
> - Address 0-day reprot of cxl_handle_rdport_errors symbol undefined
> ---
>  drivers/cxl/core/Makefile  |   1 +
>  drivers/cxl/core/core.h    |   9 ++
>  drivers/cxl/core/pci.c     | 171 +------------------------------------
>  drivers/cxl/core/pci_aer.c | 168 ++++++++++++++++++++++++++++++++++++
>  drivers/cxl/cxl.h          |   8 --
>  drivers/cxl/cxlpci.h       |   8 ++
>  tools/testing/cxl/Kbuild   |   1 +
>  7 files changed, 189 insertions(+), 177 deletions(-)
>  create mode 100644 drivers/cxl/core/pci_aer.c
> 
> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> index 79e2ef81fde8..bcea856157af 100644
> --- a/drivers/cxl/core/Makefile
> +++ b/drivers/cxl/core/Makefile
> @@ -21,3 +21,4 @@ cxl_core-$(CONFIG_CXL_REGION) += region.o
>  cxl_core-$(CONFIG_CXL_MCE) += mce.o
>  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
> +cxl_core-$(CONFIG_PCIEAER_CXL) += pci_aer.o
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 29b61828a847..96c319c66a48 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -136,4 +136,13 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
>  		    u16 *return_code);
>  #endif
>  
> +#ifdef CONFIG_PCIEAER_CXL
> +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
> +#else
> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) {}
> +#endif
> +
> +void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
> +bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
> +
>  #endif /* __CXL_CORE_H__ */

Hi Dave,

Any reason not to pull in cxl_handle_cor_ras()/cxl_handle_ras() into pci_ras.c?
These 2 functions read the RAS from the register block and then call the trace routine.
These are only called by the handlers in pci.c. I think it might round-out your changes 
nicely if they are moved into pci_ras.c s.t. when CONFIG_PCIEAER_CXL is disabled then 
they are do nothing. Or another way to look at it is they should only be enabled if 
CONFIG_PCIEAER_CXL is enabled.

-Terry

[snip]


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
  2025-07-23 21:35 ` Bowman, Terry
@ 2025-07-23 22:11   ` Dave Jiang
  0 siblings, 0 replies; 12+ messages in thread
From: Dave Jiang @ 2025-07-23 22:11 UTC (permalink / raw)
  To: Bowman, Terry, linux-cxl
  Cc: dave, jonathan.cameron, alison.schofield, vishal.l.verma,
	ira.weiny, dan.j.williams, Robert Richter



On 7/23/25 2:35 PM, Bowman, Terry wrote:
> On 7/21/2025 12:04 PM, Dave Jiang wrote:
>> Remove from core/pci_aer.c and move the CONFIG_PCIEAER_CXL ifdef to
>> cxlpci.h to deal with the exported symbols as needed. There is enough
>> AER handling code (and more to come) to move the AER code to its own
>> C file.
>>
>> Cc: Robert Richter <rrichter@amd.com>
>> Cc: Terry Bowman <terry.bowman@amd.com>
>> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
>> ---
>> v2:
>> - Address 0-day reprot of cxl_handle_rdport_errors symbol undefined
>> ---
>>  drivers/cxl/core/Makefile  |   1 +
>>  drivers/cxl/core/core.h    |   9 ++
>>  drivers/cxl/core/pci.c     | 171 +------------------------------------
>>  drivers/cxl/core/pci_aer.c | 168 ++++++++++++++++++++++++++++++++++++
>>  drivers/cxl/cxl.h          |   8 --
>>  drivers/cxl/cxlpci.h       |   8 ++
>>  tools/testing/cxl/Kbuild   |   1 +
>>  7 files changed, 189 insertions(+), 177 deletions(-)
>>  create mode 100644 drivers/cxl/core/pci_aer.c
>>
>> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
>> index 79e2ef81fde8..bcea856157af 100644
>> --- a/drivers/cxl/core/Makefile
>> +++ b/drivers/cxl/core/Makefile
>> @@ -21,3 +21,4 @@ cxl_core-$(CONFIG_CXL_REGION) += region.o
>>  cxl_core-$(CONFIG_CXL_MCE) += mce.o
>>  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
>>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
>> +cxl_core-$(CONFIG_PCIEAER_CXL) += pci_aer.o
>> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
>> index 29b61828a847..96c319c66a48 100644
>> --- a/drivers/cxl/core/core.h
>> +++ b/drivers/cxl/core/core.h
>> @@ -136,4 +136,13 @@ int cxl_set_feature(struct cxl_mailbox *cxl_mbox, const uuid_t *feat_uuid,
>>  		    u16 *return_code);
>>  #endif
>>  
>> +#ifdef CONFIG_PCIEAER_CXL
>> +void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds);
>> +#else
>> +static inline void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) {}
>> +#endif
>> +
>> +void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
>> +bool __cxl_handle_ras(struct cxl_dev_state *cxlds, void __iomem *ras_base);
>> +
>>  #endif /* __CXL_CORE_H__ */
> 
> Hi Dave,
> 
> Any reason not to pull in cxl_handle_cor_ras()/cxl_handle_ras() into pci_ras.c?
> These 2 functions read the RAS from the register block and then call the trace routine.
> These are only called by the handlers in pci.c. I think it might round-out your changes 
> nicely if they are moved into pci_ras.c s.t. when CONFIG_PCIEAER_CXL is disabled then 
> they are do nothing. Or another way to look at it is they should only be enabled if 
> CONFIG_PCIEAER_CXL is enabled.
> 

Missed them. Yes I can do that.

> -Terry
> 
> [snip]
> 
> 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
  2025-07-23 18:53         ` dan.j.williams
@ 2025-07-23 22:14           ` Dave Jiang
  2025-07-24 22:08             ` dan.j.williams
  0 siblings, 1 reply; 12+ messages in thread
From: Dave Jiang @ 2025-07-23 22:14 UTC (permalink / raw)
  To: dan.j.williams, Bowman, Terry, linux-cxl
  Cc: dave, jonathan.cameron, alison.schofield, vishal.l.verma,
	ira.weiny, Robert Richter



On 7/23/25 11:53 AM, dan.j.williams@intel.com wrote:
> Dave Jiang wrote:
> [..]
>>> I don't think we want to combine the os-first and fw-first code in a
>>> single file. The separation leveraging the Makefile conditional
>>> compile is good because fw-first doesn't use PCIEAER_CXL while
>>> os-first does.  I think core/ras.c is good as-is.
>>>
>>> cxl_aer.c is in the drivers/pci/pcie directory and is logically part
>>> of the AER service driver. This file includes the kfifo and logic
>>> for offloading CXL RAS errors to the CXL driver. I think the
>>> cxl_aer.c naming is fine but understand it could be argued to use
>>> cxl_ras.c instead. My concern was in the CXL driver AER-RAS naming
>>> but the same rules could be applied here as well. 
>>>
>>> To improve the original situation mentioned I recommend we use 'ras'
>>> in what is named pci_aer.c here?  Maybe 'pci_ras.c' instead? It's
>>> not perfect but would be more precise. This feels somewhat nitpicky
>>> on my part and will not be an issue if we leave as-is. Thanks for
>>> considering.
>>
>> Thanks for the detailed explanation. I'll rename it to pci_ras.c.
> 
> Hey guys, please trim your responses.
> 
> Between RAS, AER, and EDAC there is significant overlap. You could say
> that pci_ras.c is also misnamed because the error details being reported
> are CXL not PCI. I am ok with "RAS" for anything that handles decorating
> an AER notification with extra information, and "EDAC" for all the other
> error notification / reporting / and management flows that handled
> separately from AER.

I wonder if we should rename ras.c to cper.c and move the new code to ras.c.... or cxl_ras.c.


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c
  2025-07-23 22:14           ` Dave Jiang
@ 2025-07-24 22:08             ` dan.j.williams
  0 siblings, 0 replies; 12+ messages in thread
From: dan.j.williams @ 2025-07-24 22:08 UTC (permalink / raw)
  To: Dave Jiang, dan.j.williams, Bowman, Terry, linux-cxl
  Cc: dave, jonathan.cameron, alison.schofield, vishal.l.verma,
	ira.weiny, Robert Richter

Dave Jiang wrote:
[..]
> > Between RAS, AER, and EDAC there is significant overlap. You could say
> > that pci_ras.c is also misnamed because the error details being reported
> > are CXL not PCI. I am ok with "RAS" for anything that handles decorating
> > an AER notification with extra information, and "EDAC" for all the other
> > error notification / reporting / and management flows that handled
> > separately from AER.
> 
> I wonder if we should rename ras.c to cper.c and move the new code to ras.c.... or cxl_ras.c.

In the protocol error thread my ask was to just colocate all this error
handling code in ras.c [1]. No real value I can see to do fine grained
separation of these concerns. CPER notifications will eventually want to
trigger a cxl_port walk, so keep it all together unless someone can
point to a good reason to split them up.

[1]: http://lore.kernel.org/68829db3d3e63_134cc710080@dwillia2-xfh.jf.intel.com.notmuch

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2025-07-24 22:08 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-21 17:04 [PATCH v2] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c Dave Jiang
2025-07-22 10:57 ` Jonathan Cameron
2025-07-22 14:08 ` Joshua Hahn
2025-07-23 14:55 ` Bowman, Terry
2025-07-23 15:14   ` Dave Jiang
2025-07-23 16:38     ` Bowman, Terry
2025-07-23 16:43       ` Dave Jiang
2025-07-23 18:53         ` dan.j.williams
2025-07-23 22:14           ` Dave Jiang
2025-07-24 22:08             ` dan.j.williams
2025-07-23 21:35 ` Bowman, Terry
2025-07-23 22:11   ` Dave Jiang

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).