public inbox for linux-pci@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v13 24/25] CXL/PCI: Enable CXL protocol errors during CXL Port probe
@ 2025-11-04  0:29 Terry Bowman
  2025-11-04  0:29 ` [PATCH v13 25/25] CXL/PCI: Disable CXL protocol error interrupts during CXL Port cleanup Terry Bowman
  0 siblings, 1 reply; 2+ messages in thread
From: Terry Bowman @ 2025-11-04  0:29 UTC (permalink / raw)
  To: dave, jonathan.cameron, dave.jiang, alison.schofield,
	dan.j.williams, bhelgaas, shiju.jose, ming.li,
	Smita.KoralahalliChannabasappa, rrichter, dan.carpenter,
	PradeepVineshReddy.Kodamati, lukas, Benjamin.Cheatham,
	sathyanarayanan.kuppuswamy, linux-cxl, alucerop, ira.weiny
  Cc: linux-kernel, linux-pci, terry.bowman

CXL protocol errors are not enabled for all CXL devices after boot. These
must be enabled inorder to process CXL protocol errors.

Introduce cxl_unmask_proto_interrupts() to call pci_aer_unmask_internal_errors().
pci_aer_unmask_internal_errors() expects the pdev->aer_cap is initialized.
But, dev->aer_cap is not initialized for CXL Upstream Switch Ports and CXL
Downstream Switch Ports. Initialize the dev->aer_cap if necessary. Enable AER
correctable internal errors and uncorrectable internal errors for all CXL
devices.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ben Cheatham <benjamin.cheatham@amd.com>

---

Changes in v12->v13:
- Add dev and dev_is_pci() NULL checks in cxl_unmask_proto_interrupts() (Terry)
- Add Dave Jiang's and Ben's review-by

Changes in v11->v12:
- None

Changes in v10->v11:
- Added check for valid PCI devices in is_cxl_error() (Terry)
- Removed check for RCiEP in cxl_handle_proto_err() and
  cxl_report_error_detected() (Terry)
---
 drivers/cxl/core/core.h |  4 ++++
 drivers/cxl/core/port.c |  4 ++++
 drivers/cxl/core/ras.c  | 26 +++++++++++++++++++++++++-
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 046ec65ed147..a7a0838c8f23 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -159,6 +159,8 @@ pci_ers_result_t pci_error_detected(struct pci_dev *pdev,
 void pci_cor_error_detected(struct pci_dev *pdev);
 pci_ers_result_t cxl_port_error_detected(struct device *dev);
 void cxl_port_cor_error_detected(struct device *dev);
+void cxl_mask_proto_interrupts(struct device *dev);
+void cxl_unmask_proto_interrupts(struct device *dev);
 #else
 static inline int cxl_ras_init(void)
 {
@@ -183,6 +185,8 @@ static inline pci_ers_result_t cxl_port_error_detected(struct device *dev)
 {
 	return PCI_ERS_RESULT_NONE;
 }
+static inline void cxl_unmask_proto_interrupts(struct device *dev) { }
+static inline void cxl_mask_proto_interrupts(struct device *dev) { }
 #endif /* CONFIG_CXL_RAS */
 
 /* Restricted CXL Host specific RAS functions */
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index d060f864cf2e..a23c742eb670 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1747,6 +1747,8 @@ static int add_port_attach_ep(struct cxl_memdev *cxlmd,
 		rc = -ENXIO;
 	}
 
+	cxl_unmask_proto_interrupts(cxlmd->cxlds->dev);
+
 	return rc;
 }
 
@@ -1833,6 +1835,8 @@ int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
 
 			rc = cxl_add_ep(dport, &cxlmd->dev);
 
+			cxl_unmask_proto_interrupts(cxlmd->cxlds->dev);
+
 			/*
 			 * If the endpoint already exists in the port's list,
 			 * that's ok, it was added on a previous pass.
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 52c6f19564b6..101e55723785 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -122,6 +122,23 @@ static bool is_pcie_endpoint(struct pci_dev *pdev)
 	return pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT;
 }
 
+void cxl_unmask_proto_interrupts(struct device *dev)
+{
+	if (!dev || !dev_is_pci(dev))
+		return;
+
+	struct pci_dev *pdev __free(pci_dev_put) = pci_dev_get(to_pci_dev(dev));
+
+	if (!pdev->aer_cap) {
+		pdev->aer_cap = pci_find_ext_capability(pdev,
+							PCI_EXT_CAP_ID_ERR);
+		if (!pdev->aer_cap)
+			return;
+	}
+
+	pci_aer_unmask_internal_errors(pdev);
+}
+
 static void cxl_dport_map_ras(struct cxl_dport *dport)
 {
 	struct cxl_register_map *map = &dport->reg_map;
@@ -230,7 +247,10 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
 
 		cxl_dport_map_rch_aer(dport);
 		cxl_disable_rch_root_ints(dport);
+		return;
 	}
+
+	cxl_unmask_proto_interrupts(dport->dport_dev);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
 
@@ -241,8 +261,12 @@ void cxl_uport_init_ras_reporting(struct cxl_port *port,
 
 	map->host = host;
 	if (cxl_map_component_regs(map, &port->uport_regs,
-				   BIT(CXL_CM_CAP_CAP_ID_RAS)))
+				   BIT(CXL_CM_CAP_CAP_ID_RAS))) {
 		dev_dbg(&port->dev, "Failed to map RAS capability\n");
+		return;
+	}
+
+	cxl_unmask_proto_interrupts(port->uport_dev);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_uport_init_ras_reporting, "CXL");
 
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH v13 25/25] CXL/PCI: Disable CXL protocol error interrupts during CXL Port cleanup
  2025-11-04  0:29 [PATCH v13 24/25] CXL/PCI: Enable CXL protocol errors during CXL Port probe Terry Bowman
@ 2025-11-04  0:29 ` Terry Bowman
  0 siblings, 0 replies; 2+ messages in thread
From: Terry Bowman @ 2025-11-04  0:29 UTC (permalink / raw)
  To: dave, jonathan.cameron, dave.jiang, alison.schofield,
	dan.j.williams, bhelgaas, shiju.jose, ming.li,
	Smita.KoralahalliChannabasappa, rrichter, dan.carpenter,
	PradeepVineshReddy.Kodamati, lukas, Benjamin.Cheatham,
	sathyanarayanan.kuppuswamy, linux-cxl, alucerop, ira.weiny
  Cc: linux-kernel, linux-pci, terry.bowman

During CXL device cleanup the CXL PCIe Port device interrupts remain
enabled. This potentially allows unnecessary interrupt processing on
behalf of the CXL errors while the device is destroyed.

Disable CXL protocol errors by setting the CXL devices' AER mask register.

Introduce pci_aer_mask_internal_errors() similar to pci_aer_unmask_internal_errors().
Add to the AER service driver allowing other subsystems to use.

Introduce cxl_mask_proto_interrupts() to call pci_aer_mask_internal_errors().
Add calls to cxl_mask_proto_interrupts() within CXL Port teardown for CXL
Root Ports, CXL Downstream Switch Ports, CXL Upstream Switch Ports, and CXL
Endpoints. Follow the same "bottom-up" approach used during CXL Port
teardown.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com>

---

Changes in v12->v13:
- Added dev and dev_is_pci() checks in cxl_mask_proto_interrupts() (Terry)

Changes in v11->v12:
- Keep pci_aer_mask_internal_errors() in driver/pci/pcie/aer.c (Lukas)
- Update commit description for pci_aer_mask_internal_errors()
- Add check `if (port->parent_dport)` in delete_switch_port() (Terry)

Changes in v10->v11:
- Removed guard() cxl_mask_proto_interrupts(). RP was blocking during
  testing. (Terry)
---
 drivers/cxl/core/port.c | 10 +++++++++-
 drivers/cxl/core/ras.c  | 10 ++++++++++
 drivers/pci/pcie/aer.c  | 21 +++++++++++++++++++++
 include/linux/aer.h     |  2 ++
 4 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index a23c742eb670..d19ebf052d76 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1437,6 +1437,10 @@ EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, "CXL");
  */
 static void delete_switch_port(struct cxl_port *port)
 {
+	cxl_mask_proto_interrupts(port->uport_dev);
+	if (port->parent_dport)
+		cxl_mask_proto_interrupts(port->parent_dport->dport_dev);
+
 	devm_release_action(port->dev.parent, cxl_unlink_parent_dport, port);
 	devm_release_action(port->dev.parent, cxl_unlink_uport, port);
 	devm_release_action(port->dev.parent, unregister_port, port);
@@ -1458,8 +1462,10 @@ static void del_dports(struct cxl_port *port)
 
 	device_lock_assert(&port->dev);
 
-	xa_for_each(&port->dports, index, dport)
+	xa_for_each(&port->dports, index, dport) {
+		cxl_mask_proto_interrupts(dport->dport_dev);
 		del_dport(dport);
+	}
 }
 
 struct detach_ctx {
@@ -1486,6 +1492,8 @@ static void cxl_detach_ep(void *data)
 {
 	struct cxl_memdev *cxlmd = data;
 
+	cxl_mask_proto_interrupts(cxlmd->cxlds->dev);
+
 	for (int i = cxlmd->depth - 1; i >= 1; i--) {
 		struct cxl_port *port, *parent_port;
 		struct detach_ctx ctx = {
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 101e55723785..6dccbe66c9ac 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -139,6 +139,16 @@ void cxl_unmask_proto_interrupts(struct device *dev)
 	pci_aer_unmask_internal_errors(pdev);
 }
 
+void cxl_mask_proto_interrupts(struct device *dev)
+{
+	if (!dev || !dev_is_pci(dev))
+		return;
+
+	struct pci_dev *pdev __free(pci_dev_put) = pci_dev_get(to_pci_dev(dev));
+
+	pci_aer_mask_internal_errors(pdev);
+}
+
 static void cxl_dport_map_ras(struct cxl_dport *dport)
 {
 	struct cxl_register_map *map = &dport->reg_map;
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 4cf44297bb24..fcc2f43c3383 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1149,6 +1149,27 @@ void pci_aer_unmask_internal_errors(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_GPL(pci_aer_unmask_internal_errors);
 
+/**
+ * pci_aer_mask_internal_errors - mask internal errors
+ * @dev: pointer to the pcie_dev data structure
+ *
+ * Masks internal errors in the Uncorrectable and Correctable Error
+ * Mask registers.
+ *
+ * Note: AER must be enabled and supported by the device which must be
+ * checked in advance, e.g. with pcie_aer_is_native().
+ */
+void pci_aer_mask_internal_errors(struct pci_dev *dev)
+{
+	int aer = dev->aer_cap;
+
+	pci_clear_and_set_config_dword(dev, aer + PCI_ERR_UNCOR_MASK,
+				       0, PCI_ERR_UNC_INTN);
+	pci_clear_and_set_config_dword(dev, aer + PCI_ERR_COR_MASK,
+				       0, PCI_ERR_COR_INTERNAL);
+}
+EXPORT_SYMBOL_GPL(pci_aer_mask_internal_errors);
+
 /**
  * pci_aer_handle_error - handle logging error into an event log
  * @dev: pointer to pci_dev data structure of error source device
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 64aef69fb546..2b89bd940ac1 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -69,6 +69,7 @@ int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
 void pci_aer_clear_fatal_status(struct pci_dev *dev);
 int pcie_aer_is_native(struct pci_dev *dev);
 void pci_aer_unmask_internal_errors(struct pci_dev *dev);
+void pci_aer_mask_internal_errors(struct pci_dev *dev);
 #else
 static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
 {
@@ -77,6 +78,7 @@ static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
 static inline void pci_aer_clear_fatal_status(struct pci_dev *dev) { }
 static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; }
 static inline void pci_aer_unmask_internal_errors(struct pci_dev *dev) { }
+static inline void pci_aer_mask_internal_errors(struct pci_dev *dev) { }
 #endif
 
 #ifdef CONFIG_CXL_RAS
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2025-11-04  0:29 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-11-04  0:29 [PATCH v13 24/25] CXL/PCI: Enable CXL protocol errors during CXL Port probe Terry Bowman
2025-11-04  0:29 ` [PATCH v13 25/25] CXL/PCI: Disable CXL protocol error interrupts during CXL Port cleanup Terry Bowman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox