Linux CXL
 help / color / mirror / Atom feed
From: Terry Bowman <terry.bowman@amd.com>
To: <linux-cxl@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<linux-pci@vger.kernel.org>, <nifan.cxl@gmail.com>,
	<dave@stgolabs.net>, <jonathan.cameron@huawei.com>,
	<dave.jiang@intel.com>, <alison.schofield@intel.com>,
	<vishal.l.verma@intel.com>, <dan.j.williams@intel.com>,
	<bhelgaas@google.com>, <mahesh@linux.ibm.com>,
	<ira.weiny@intel.com>, <oohall@gmail.com>,
	<Benjamin.Cheatham@amd.com>, <rrichter@amd.com>,
	<nathan.fontenot@amd.com>, <terry.bowman@amd.com>,
	<Smita.KoralahalliChannabasappa@amd.com>, <lukas@wunner.de>,
	<ming.li@zohomail.com>, <PradeepVineshReddy.Kodamati@amd.com>
Subject: [PATCH v8 16/16] CXL/PCI: Disable CXL protocol errors during CXL Port cleanup
Date: Wed, 26 Mar 2025 20:47:17 -0500	[thread overview]
Message-ID: <20250327014717.2988633-17-terry.bowman@amd.com> (raw)
In-Reply-To: <20250327014717.2988633-1-terry.bowman@amd.com>

During CXL device cleanup the CXL PCIe Port device interrupts may remain
enabled. This can potentialy allow unnecessary interrupt processing on
behalf of the CXL errors while the device is destroyed.

Disable CXL protocol errors by setting the CXL devices' AER mask register.

Introduce pci_aer_mask_internal_errors() similar to pci_aer_unmask_internal_errors().

Next, introduce cxl_disable_prot_errors() to call pci_aer_mask_internal_errors().
Register cxl_disable_prot_errors() to run at CXL device cleanup.
Register for CXL Root Ports, CXL Downstream Ports, CXL Upstream Ports, and
CXL Endpoints.

Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
 drivers/cxl/port.c     | 18 +++++++++++++++++-
 drivers/pci/pcie/aer.c | 25 +++++++++++++++++++++++++
 include/linux/aer.h    |  1 +
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index bb7a0526e609..7e3efd8be8eb 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -101,6 +101,19 @@ void cxl_enable_prot_errors(struct device *dev)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_enable_prot_errors, "CXL");
 
+void cxl_disable_prot_errors(void *_dev)
+{
+	struct device *dev = _dev;
+	struct pci_dev *pdev = to_pci_dev(dev);
+	struct device *pci_dev __free(put_device) = get_device(&pdev->dev);
+
+	if (!pci_dev || !pdev->aer_cap)
+		return;
+
+	pci_aer_mask_internal_errors(pdev);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_disable_prot_errors, "CXL");
+
 static void cxl_dport_map_rch_aer(struct cxl_dport *dport)
 {
 	resource_size_t aer_phys;
@@ -166,6 +179,7 @@ static void cxl_uport_init_ras_reporting(struct cxl_port *port,
 
 	cxl_assign_error_handlers(&port->dev, &cxl_port_error_handlers);
 	cxl_enable_prot_errors(port->uport_dev);
+	devm_add_action_or_reset(host, cxl_disable_prot_errors, port->uport_dev);
 }
 
 /**
@@ -197,6 +211,7 @@ void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host)
 
 	cxl_assign_error_handlers(dport->dport_dev, &cxl_port_error_handlers);
 	cxl_enable_prot_errors(dport->dport_dev);
+	devm_add_action_or_reset(host, cxl_disable_prot_errors, dport->dport_dev);
 }
 EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL");
 
@@ -223,7 +238,7 @@ static void cxl_endpoint_port_init_ras(struct cxl_port *port)
 	struct device *cxlmd_dev __free(put_device) = &cxlmd->dev;
 	struct cxl_dev_state *cxlds = cxlmd->cxlds;
 
-	if (!dport || !dev_is_pci(dport->dport_dev)) {
+	if (!dport || !dev_is_pci(dport->dport_dev) || !dev_is_pci(cxlds->dev)) {
 		dev_err(&port->dev, "CXL port topology not found\n");
 		return;
 	}
@@ -232,6 +247,7 @@ static void cxl_endpoint_port_init_ras(struct cxl_port *port)
 
 	cxl_assign_error_handlers(cxlmd_dev, &cxl_ep_error_handlers);
 	cxl_enable_prot_errors(cxlds->dev);
+	devm_add_action_or_reset(cxlds->dev, cxl_disable_prot_errors, cxlds->dev);
 }
 
 #else
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index d3068f5cc767..d1ef0c676ff8 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -977,6 +977,31 @@ void pci_aer_unmask_internal_errors(struct pci_dev *dev)
 }
 EXPORT_SYMBOL_NS_GPL(pci_aer_unmask_internal_errors, "CXL");
 
+/**
+ * pci_aer_mask_internal_errors - mask internal errors
+ * @dev: pointer to the pcie_dev data structure
+ *
+ * Masks internal errors in the Uncorrectable and Correctable Error
+ * Mask registers.
+ *
+ * Note: AER must be enabled and supported by the device which must be
+ * checked in advance, e.g. with pcie_aer_is_native().
+ */
+void pci_aer_mask_internal_errors(struct pci_dev *dev)
+{
+	int aer = dev->aer_cap;
+	u32 mask;
+
+	pci_read_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, &mask);
+	mask |= PCI_ERR_UNC_INTN;
+	pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_MASK, mask);
+
+	pci_read_config_dword(dev, aer + PCI_ERR_COR_MASK, &mask);
+	mask |= PCI_ERR_COR_INTERNAL;
+	pci_write_config_dword(dev, aer + PCI_ERR_COR_MASK, mask);
+}
+EXPORT_SYMBOL_NS_GPL(pci_aer_mask_internal_errors, "CXL");
+
 static bool is_cxl_mem_dev(struct pci_dev *dev)
 {
 	/*
diff --git a/include/linux/aer.h b/include/linux/aer.h
index a65fe324fad2..f0c84db466e5 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -101,5 +101,6 @@ int cper_severity_to_aer(int cper_severity);
 void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
 		       int severity, struct aer_capability_regs *aer_regs);
 void pci_aer_unmask_internal_errors(struct pci_dev *dev);
+void pci_aer_mask_internal_errors(struct pci_dev *dev);
 #endif //_AER_H_
 
-- 
2.34.1


  parent reply	other threads:[~2025-03-27  1:50 UTC|newest]

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-27  1:47 [PATCH v8 00/16] Enable CXL PCIe port protocol error handling and logging Terry Bowman
2025-03-27  1:47 ` [PATCH v8 01/16] PCI/CXL: Introduce PCIe helper function pcie_is_cxl() Terry Bowman
2025-03-27 15:11   ` Ira Weiny
2025-03-27 15:30     ` Bowman, Terry
2025-03-27  1:47 ` [PATCH v8 02/16] PCI/AER: Modify AER driver logging to report CXL or PCIe bus error type Terry Bowman
2025-03-27 16:48   ` Bjorn Helgaas
2025-03-27 17:15     ` Bowman, Terry
2025-03-27 17:49       ` Bjorn Helgaas
2025-03-27 16:58   ` Ira Weiny
2025-03-27 17:17     ` Bowman, Terry
2025-03-27  1:47 ` [PATCH v8 03/16] CXL/AER: Introduce Kfifo for forwarding CXL errors Terry Bowman
2025-03-27 17:08   ` Bjorn Helgaas
2025-03-27 18:12     ` Bowman, Terry
2025-03-28 17:02       ` Bjorn Helgaas
2025-03-28 17:36         ` Bowman, Terry
2025-03-28 17:01   ` Ira Weiny
2025-04-07 13:43     ` Bowman, Terry
2025-04-04 16:53   ` Jonathan Cameron
2025-04-23 14:33   ` Jonathan Cameron
2025-04-23 15:04   ` Jonathan Cameron
2025-04-23 22:12   ` Gregory Price
2025-03-27  1:47 ` [PATCH v8 04/16] cxl/aer: AER service driver forwards CXL error to CXL driver Terry Bowman
2025-03-27 17:13   ` Bjorn Helgaas
2025-04-07 14:00     ` Bowman, Terry
2025-04-23 15:04   ` Jonathan Cameron
2025-04-24 14:17     ` Bowman, Terry
2025-04-25 13:18       ` Jonathan Cameron
2025-04-25 21:03         ` Bowman, Terry
2025-05-15 21:52         ` Bowman, Terry
2025-05-20 11:04           ` Jonathan Cameron
2025-05-20 13:21             ` Bowman, Terry
2025-05-21 18:34               ` Jonathan Cameron
2025-05-21 23:30                 ` Bowman, Terry
2025-04-23 22:21   ` Gregory Price
2025-03-27  1:47 ` [PATCH v8 05/16] PCI/AER: CXL driver dequeues CXL error forwarded from AER service driver Terry Bowman
2025-03-27  4:43   ` kernel test robot
2025-04-23 16:28   ` Jonathan Cameron
2025-04-24 15:03     ` Bowman, Terry
2025-03-27  1:47 ` [PATCH v8 06/16] CXL/PCI: Introduce CXL uncorrectable protocol error 'recovery' Terry Bowman
2025-03-27  3:37   ` kernel test robot
2025-03-27  4:19   ` kernel test robot
2025-04-23 16:35   ` Jonathan Cameron
2025-04-24 14:22     ` Bowman, Terry
2025-03-27  1:47 ` [PATCH v8 07/16] cxl/pci: Move existing CXL RAS initialization to CXL's cxl_port driver Terry Bowman
2025-04-17 10:18   ` Jonathan Cameron
2025-04-24 14:25     ` Bowman, Terry
2025-05-12 14:47     ` Bowman, Terry
2025-03-27  1:47 ` [PATCH v8 08/16] cxl/pci: Map CXL Endpoint Port and CXL Switch Port RAS registers Terry Bowman
2025-03-27  1:47 ` [PATCH v8 09/16] cxl/pci: Update RAS handler interfaces to also support CXL PCIe Ports Terry Bowman
2025-03-27  1:47 ` [PATCH v8 10/16] cxl/pci: Add log message if RAS registers are not mapped Terry Bowman
2025-04-23 16:41   ` Jonathan Cameron
2025-04-24 14:30     ` Bowman, Terry
2025-03-27  1:47 ` [PATCH v8 11/16] cxl/pci: Unifi CXL trace logging for CXL Endpoints and CXL Ports Terry Bowman
2025-04-23 16:44   ` Jonathan Cameron
2025-05-07 16:28     ` Shiju Jose
2025-05-07 18:30       ` Bowman, Terry
2025-03-27  1:47 ` [PATCH v8 12/16] cxl/pci: Assign CXL Port protocol error handlers Terry Bowman
2025-04-23 16:47   ` Jonathan Cameron
2025-03-27  1:47 ` [PATCH v8 13/16] cxl/pci: Assign CXL Endpoint " Terry Bowman
2025-03-27 19:46   ` kernel test robot
2025-04-23 16:49   ` Jonathan Cameron
2025-03-27  1:47 ` [PATCH v8 14/16] cxl/pci: Remove unnecessary CXL Endpoint handling helper functions Terry Bowman
2025-04-17 17:22   ` Jonathan Cameron
2025-03-27  1:47 ` [PATCH v8 15/16] CXL/PCI: Enable CXL protocol errors during CXL Port probe Terry Bowman
2025-04-04 17:05   ` Jonathan Cameron
2025-04-07 14:34     ` Bowman, Terry
2025-03-27  1:47 ` Terry Bowman [this message]
2025-03-28  1:18   ` [PATCH v8 16/16] CXL/PCI: Disable CXL protocol errors during CXL Port cleanup kernel test robot
2025-04-04 17:04   ` Jonathan Cameron
2025-04-07 14:25     ` Bowman, Terry
2025-04-17 10:13       ` Jonathan Cameron
2025-04-24 16:37         ` Bowman, Terry
2025-03-27 17:16 ` [PATCH v8 00/16] Enable CXL PCIe port protocol error handling and logging Bjorn Helgaas
2025-03-27 22:04   ` Bowman, Terry
2025-05-06 23:06 ` Gregory Price
2025-05-07 18:28   ` Bowman, Terry

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250327014717.2988633-17-terry.bowman@amd.com \
    --to=terry.bowman@amd.com \
    --cc=Benjamin.Cheatham@amd.com \
    --cc=PradeepVineshReddy.Kodamati@amd.com \
    --cc=Smita.KoralahalliChannabasappa@amd.com \
    --cc=alison.schofield@intel.com \
    --cc=bhelgaas@google.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=ira.weiny@intel.com \
    --cc=jonathan.cameron@huawei.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=lukas@wunner.de \
    --cc=mahesh@linux.ibm.com \
    --cc=ming.li@zohomail.com \
    --cc=nathan.fontenot@amd.com \
    --cc=nifan.cxl@gmail.com \
    --cc=oohall@gmail.com \
    --cc=rrichter@amd.com \
    --cc=vishal.l.verma@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox