From: Terry Bowman <terry.bowman@amd.com>
To: <dave@stgolabs.net>, <jonathan.cameron@huawei.com>,
<dave.jiang@intel.com>, <alison.schofield@intel.com>,
<dan.j.williams@intel.com>, <bhelgaas@google.com>,
<shiju.jose@huawei.com>, <ming.li@zohomail.com>,
<Smita.KoralahalliChannabasappa@amd.com>, <rrichter@amd.com>,
<dan.carpenter@linaro.org>, <PradeepVineshReddy.Kodamati@amd.com>,
<lukas@wunner.de>, <Benjamin.Cheatham@amd.com>,
<sathyanarayanan.kuppuswamy@linux.intel.com>,
<linux-cxl@vger.kernel.org>, <alucerop@amd.com>,
<ira.weiny@intel.com>
Cc: <linux-kernel@vger.kernel.org>, <linux-pci@vger.kernel.org>,
<terry.bowman@amd.com>
Subject: [PATCH v12 21/25] CXL/PCI: Introduce CXL Port protocol error handlers
Date: Thu, 25 Sep 2025 17:34:36 -0500 [thread overview]
Message-ID: <20250925223440.3539069-22-terry.bowman@amd.com> (raw)
In-Reply-To: <20250925223440.3539069-1-terry.bowman@amd.com>
Introduce CXL error handlers for CXL Port devices.
Add functions cxl_port_cor_error_detected() and cxl_port_error_detected().
These will serve as the handlers for all CXL Port devices. Introduce
cxl_get_ras_base() to provide the RAS base address needed by the handlers.
Update cxl_handle_proto_error() to call the CXL Port or CXL Endpoint
handler depending on which CXL device reports the error.
Implement cxl_get_ras_base() to return the cached RAS register address of a
CXL Root Port, CXL Downstream Port, or CXL Upstream Port.
Introduce get_pci_cxl_host_dev() to return the host responsible for
releasing the RAS mapped resources. CXL endpoints do not use a host to
manage its resources, allow for NULL in the case of an EP. Use reference
count increment on the host to prevent resource release. Make the caller
responsible for the reference decrement.
Update the AER driver's is_cxl_error() PCI type check because CXL Port
devices are now supported.
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
---
Changes in v11->v12:
- Add call to cxl_pci_drv_bound() in cxl_handle_proto_error() and
pci_to_cxl_dev()
- Change cxl_error_detected() -> cxl_cor_error_detected()
- Remove NULL variable assignments
- Replace bus_find_device() with find_cxl_port_by_uport() for upstream
port searches.
Changes in v10->v11:
- None
---
drivers/cxl/core/core.h | 10 +++
drivers/cxl/core/port.c | 7 +-
drivers/cxl/core/ras.c | 159 ++++++++++++++++++++++++++++++++--
drivers/pci/pcie/aer_cxl_vh.c | 5 +-
4 files changed, 170 insertions(+), 11 deletions(-)
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 9ceff8acf844..3197a71bf7b8 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -156,6 +156,8 @@ pci_ers_result_t pci_error_detected(struct pci_dev *pdev,
void pci_cor_error_detected(struct pci_dev *pdev);
void cxl_cor_error_detected(struct device *dev);
pci_ers_result_t cxl_error_detected(struct device *dev);
+void cxl_port_cor_error_detected(struct device *dev);
+pci_ers_result_t cxl_port_error_detected(struct device *dev);
#else
static inline int cxl_ras_init(void)
{
@@ -180,9 +182,17 @@ static inline pci_ers_result_t cxl_error_detected(struct device *dev)
{
return PCI_ERS_RESULT_NONE;
}
+static inline void cxl_port_cor_error_detected(struct device *dev) { }
+static inline pci_ers_result_t cxl_port_error_detected(struct device *dev)
+{
+ return PCI_ERS_RESULT_NONE;
+}
#endif // CONFIG_CXL_RAS
int cxl_gpf_port_setup(struct cxl_dport *dport);
+struct cxl_port *find_cxl_port(struct device *dport_dev,
+ struct cxl_dport **dport);
+struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev);
struct cxl_hdm;
int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm,
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 56fa4ac33e8b..f34a44abb2c9 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -1357,8 +1357,8 @@ static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx)
return NULL;
}
-static struct cxl_port *find_cxl_port(struct device *dport_dev,
- struct cxl_dport **dport)
+struct cxl_port *find_cxl_port(struct device *dport_dev,
+ struct cxl_dport **dport)
{
struct cxl_find_port_ctx ctx = {
.dport_dev = dport_dev,
@@ -1561,7 +1561,7 @@ static int match_port_by_uport(struct device *dev, const void *data)
* Function takes a device reference on the port device. Caller should do a
* put_device() when done.
*/
-static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
+struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
{
struct device *dev;
@@ -1570,6 +1570,7 @@ static struct cxl_port *find_cxl_port_by_uport(struct device *uport_dev)
return to_cxl_port(dev);
return NULL;
}
+EXPORT_SYMBOL_NS_GPL(find_cxl_port_by_uport, "CXL");
static int update_decoder_targets(struct device *dev, void *data)
{
diff --git a/drivers/cxl/core/ras.c b/drivers/cxl/core/ras.c
index 9acfe24ba3bb..7e8d63c32d72 100644
--- a/drivers/cxl/core/ras.c
+++ b/drivers/cxl/core/ras.c
@@ -250,6 +250,129 @@ static void cxl_dport_map_ras(struct cxl_dport *dport)
dev_dbg(dev, "Failed to map RAS capability.\n");
}
+static void __iomem *cxl_get_ras_base(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ switch (pci_pcie_type(pdev)) {
+ case PCI_EXP_TYPE_ROOT_PORT:
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ {
+ struct cxl_dport *dport;
+ struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
+
+ if (!dport || !dport->dport_dev) {
+ pci_err(pdev, "Failed to find the CXL device");
+ return NULL;
+ }
+
+ return dport->regs.ras;
+ }
+ case PCI_EXP_TYPE_UPSTREAM:
+ {
+ struct cxl_port *port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
+
+ if (!port) {
+ pci_err(pdev, "Failed to find the CXL device");
+ return NULL;
+ }
+
+ return port->uport_regs.ras;
+ }
+ }
+
+ dev_warn_once(dev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev));
+ return NULL;
+}
+
+static struct device *pci_to_cxl_dev(struct pci_dev *pdev)
+{
+ switch (pci_pcie_type(pdev)) {
+ case PCI_EXP_TYPE_ROOT_PORT:
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ {
+ struct cxl_dport *dport;
+ struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
+
+ if (!port) {
+ pci_err(pdev, "Failed to find the CXL device");
+ return NULL;
+ }
+
+ return dport->dport_dev;
+ }
+ case PCI_EXP_TYPE_UPSTREAM:
+ {
+ struct cxl_port *port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
+
+ if (!port) {
+ pci_err(pdev, "Failed to find the CXL device");
+ return NULL;
+ }
+
+ return port->uport_dev;
+ }
+ case PCI_EXP_TYPE_ENDPOINT:
+ {
+ struct cxl_dev_state *cxlds;
+
+ if (!cxl_pci_drv_bound(pdev))
+ return NULL;
+
+ cxlds = pci_get_drvdata(pdev);
+ return cxlds->dev;
+ }
+ }
+
+ pci_warn_once(pdev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev));
+ return NULL;
+}
+
+/*
+ * Return 'struct device *' responsible for freeing pdev's CXL resources.
+ * Caller is responsible for reference count decrementing the return
+ * 'struct device *'.
+ *
+ * dev: Find the host of this dev
+ */
+static struct device *get_cxl_host_dev(struct device *dev)
+{
+ struct pci_dev *pdev = to_pci_dev(dev);
+
+ switch (pci_pcie_type(pdev)) {
+ case PCI_EXP_TYPE_ROOT_PORT:
+ case PCI_EXP_TYPE_DOWNSTREAM:
+ {
+ struct cxl_dport *dport;
+ struct cxl_port *port __free(put_cxl_port) = find_cxl_port(&pdev->dev, &dport);
+
+ if (!port) {
+ pci_err(pdev, "Failed to find the CXL device");
+ return NULL;
+ }
+
+ return &port->dev;
+ }
+ case PCI_EXP_TYPE_UPSTREAM:
+ {
+ struct cxl_port *port __free(put_cxl_port) = find_cxl_port_by_uport(&pdev->dev);
+
+ if (!port) {
+ pci_err(pdev, "Failed to find the CXL device");
+ return NULL;
+ }
+
+ return &port->dev;
+ }
+ /* Endpoint resources are managed by endpoint itself */
+ case PCI_EXP_TYPE_ENDPOINT:
+ return NULL;
+ }
+
+ dev_warn_once(dev, "Error: Unsupported device type (%X)", pci_pcie_type(pdev));
+ return NULL;
+}
+
/**
* cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport
* @dport: the cxl_dport that needs to be initialized
@@ -399,6 +522,22 @@ static pci_ers_result_t cxl_handle_ras(struct device *dev, u64 serial, void __io
return PCI_ERS_RESULT_PANIC;
}
+void cxl_port_cor_error_detected(struct device *dev)
+{
+ void __iomem *ras_base = cxl_get_ras_base(dev);
+
+ cxl_handle_cor_ras(dev, 0, ras_base);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_port_cor_error_detected, "CXL");
+
+pci_ers_result_t cxl_port_error_detected(struct device *dev)
+{
+ void __iomem *ras_base = cxl_get_ras_base(dev);
+
+ return cxl_handle_ras(dev, 0, ras_base);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_port_error_detected, "CXL");
+
void cxl_cor_error_detected(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
@@ -469,9 +608,8 @@ EXPORT_SYMBOL_NS_GPL(pci_error_detected, "CXL");
static void cxl_handle_proto_error(struct cxl_proto_err_work_data *err_info)
{
struct pci_dev *pdev = err_info->pdev;
- struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
- struct cxl_memdev *cxlmd = cxlds->cxlmd;
- struct device *host_dev __free(put_device) = get_device(&cxlmd->dev);
+ struct device *dev = pci_to_cxl_dev(pdev);
+ struct device *host_dev __free(put_device) = get_cxl_host_dev(&pdev->dev);
if (err_info->severity == AER_CORRECTABLE) {
int aer = pdev->aer_cap;
@@ -483,13 +621,20 @@ static void cxl_handle_proto_error(struct cxl_proto_err_work_data *err_info)
aer + PCI_ERR_COR_STATUS,
0, PCI_ERR_COR_INTERNAL);
- if (!cxl_pci_drv_bound(pdev))
- return;
+ if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) {
+
+ if (!cxl_pci_drv_bound(pdev))
+ return;
+
+ cxl_cor_error_detected(dev);
+
+ } else {
+ cxl_port_cor_error_detected(dev);
+ }
- cxl_cor_error_detected(&cxlmd->dev);
pcie_clear_device_status(pdev);
} else {
- cxl_do_recovery(&cxlmd->dev);
+ cxl_do_recovery(dev);
}
}
diff --git a/drivers/pci/pcie/aer_cxl_vh.c b/drivers/pci/pcie/aer_cxl_vh.c
index 8c0979299446..dbd7cb5d1a0a 100644
--- a/drivers/pci/pcie/aer_cxl_vh.c
+++ b/drivers/pci/pcie/aer_cxl_vh.c
@@ -43,7 +43,10 @@ bool is_cxl_error(struct pci_dev *pdev, struct aer_err_info *info)
if (!info || !info->is_cxl)
return false;
- if (pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT)
+ if ((pci_pcie_type(pdev) != PCI_EXP_TYPE_ENDPOINT) &&
+ (pci_pcie_type(pdev) != PCI_EXP_TYPE_ROOT_PORT) &&
+ (pci_pcie_type(pdev) != PCI_EXP_TYPE_UPSTREAM) &&
+ (pci_pcie_type(pdev) != PCI_EXP_TYPE_DOWNSTREAM))
return false;
return is_internal_error(info);
--
2.34.1
next prev parent reply other threads:[~2025-09-25 22:38 UTC|newest]
Thread overview: 92+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-09-25 22:34 [PATCH v12 00/25] Enable CXL PCIe Port Protocol Error handling and logging Terry Bowman
2025-09-25 22:34 ` [PATCH v12 01/25] cxl/pci: Remove unnecessary CXL Endpoint handling helper functions Terry Bowman
2025-09-25 22:34 ` [PATCH v12 02/25] cxl/pci: Remove unnecessary CXL RCH " Terry Bowman
2025-10-01 15:09 ` Jonathan Cameron
2025-09-25 22:34 ` [PATCH v12 03/25] cxl: Remove ifdef blocks of CONFIG_PCIEAER_CXL from core/pci.c Terry Bowman
2025-10-03 20:11 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 04/25] CXL/AER: Remove CONFIG_PCIEAER_CXL and replace with CONFIG_CXL_RAS Terry Bowman
2025-09-25 23:17 ` Dave Jiang
2025-10-01 15:11 ` Jonathan Cameron
2025-10-03 20:11 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 05/25] cxl: Move CXL driver RCH error handling into CONFIG_CXL_RCH_RAS conditional block Terry Bowman
2025-09-25 23:31 ` Dave Jiang
2025-10-01 15:23 ` Jonathan Cameron
2025-10-03 20:11 ` Cheatham, Benjamin
2025-10-06 18:52 ` Bowman, Terry
2025-09-25 22:34 ` [PATCH v12 06/25] CXL/AER: Introduce aer_cxl_rch.c into AER driver for handling CXL RCH errors Terry Bowman
2025-09-25 23:36 ` Dave Jiang
2025-09-26 12:32 ` kernel test robot
2025-10-01 15:42 ` Jonathan Cameron
2025-10-03 20:11 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 07/25] CXL/PCI: Move CXL DVSEC definitions into uapi/linux/pci_regs.h Terry Bowman
2025-09-25 23:53 ` Dave Jiang
2025-10-01 15:58 ` Jonathan Cameron
2025-10-02 15:25 ` Bowman, Terry
2025-10-03 20:11 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 08/25] PCI/CXL: Introduce pcie_is_cxl() Terry Bowman
2025-10-03 20:11 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 09/25] PCI/AER: Report CXL or PCIe bus error type in trace logging Terry Bowman
2025-10-03 20:11 ` Cheatham, Benjamin
2025-10-06 19:59 ` Bowman, Terry
2025-09-25 22:34 ` [PATCH v12 10/25] CXL/AER: Update PCI class code check to use FIELD_GET() Terry Bowman
2025-09-26 0:02 ` Dave Jiang
2025-10-01 16:12 ` Jonathan Cameron
2025-10-02 7:40 ` Lukas Wunner
2025-10-30 17:16 ` Bowman, Terry
2025-10-31 5:30 ` Lukas Wunner
2025-09-25 22:34 ` [PATCH v12 11/25] cxl/pci: Update RAS handler interfaces to also support CXL Ports Terry Bowman
2025-10-03 20:11 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 12/25] cxl/pci: Log message if RAS registers are unmapped Terry Bowman
2025-10-03 20:11 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 13/25] cxl/pci: Unify CXL trace logging for CXL Endpoints and CXL Ports Terry Bowman
2025-09-26 20:44 ` Dave Jiang
2025-09-25 22:34 ` [PATCH v12 14/25] cxl/pci: Update cxl_handle_cor_ras() to return early if no RAS errors Terry Bowman
2025-10-03 20:11 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 15/25] cxl/pci: Map CXL Endpoint Port and CXL Switch Port RAS registers Terry Bowman
2025-09-26 21:10 ` Dave Jiang
2025-10-24 10:25 ` Alejandro Lucero Palau
2025-10-24 17:15 ` Dave Jiang
2025-10-24 19:40 ` Bowman, Terry
2025-10-27 16:33 ` Alejandro Lucero Palau
2025-09-25 22:34 ` [PATCH v12 16/25] CXL/PCI: Introduce PCI_ERS_RESULT_PANIC Terry Bowman
2025-09-26 21:26 ` Dave Jiang
2025-10-01 16:14 ` Jonathan Cameron
2025-10-03 20:11 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 17/25] cxl/pci: Introduce CXL Endpoint protocol error handlers Terry Bowman
2025-09-26 22:04 ` Dave Jiang
2025-09-30 14:06 ` Bowman, Terry
2025-09-30 16:09 ` Dave Jiang
2025-10-03 20:12 ` Cheatham, Benjamin
2025-10-06 21:07 ` Bowman, Terry
2025-09-25 22:34 ` [PATCH v12 18/25] CXL/AER: Introduce aer_cxl_vh.c in AER driver for forwarding CXL errors Terry Bowman
2025-09-26 22:56 ` Dave Jiang
2025-10-03 20:12 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 19/25] cxl: Introduce cxl_pci_drv_bound() to check for bound driver Terry Bowman
2025-09-26 23:02 ` Dave Jiang
2025-10-02 12:27 ` Jonathan Cameron
2025-10-03 20:12 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 20/25] PCI/AER: Dequeue forwarded CXL error Terry Bowman
2025-09-26 23:26 ` Dave Jiang
2025-10-03 20:12 ` Cheatham, Benjamin
2025-10-06 20:17 ` Dave Jiang
2025-09-25 22:34 ` Terry Bowman [this message]
2025-09-29 23:32 ` [PATCH v12 21/25] CXL/PCI: Introduce CXL Port protocol error handlers Dave Jiang
2025-10-03 20:12 ` Cheatham, Benjamin
2025-10-06 21:28 ` Bowman, Terry
2025-09-25 22:34 ` [PATCH v12 22/25] CXL/PCI: Export and rename merge_result() to pci_ers_merge_result() Terry Bowman
2025-09-26 15:01 ` kernel test robot
2025-09-26 18:10 ` kernel test robot
2025-09-25 22:34 ` [PATCH v12 23/25] CXL/PCI: Introduce CXL uncorrectable protocol error recovery Terry Bowman
2025-09-30 0:26 ` Dave Jiang
2025-09-30 14:38 ` Bowman, Terry
2025-09-30 16:13 ` Dave Jiang
2025-09-30 16:43 ` Bowman, Terry
2025-09-30 16:46 ` Dave Jiang
2025-10-01 13:58 ` Bowman, Terry
2025-10-01 15:33 ` Dave Jiang
2025-10-03 20:12 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 24/25] CXL/PCI: Enable CXL protocol errors during CXL Port probe Terry Bowman
2025-09-30 0:28 ` Dave Jiang
2025-10-03 20:12 ` Cheatham, Benjamin
2025-09-25 22:34 ` [PATCH v12 25/25] CXL/PCI: Disable CXL protocol error interrupts during CXL Port cleanup Terry Bowman
2025-10-03 20:12 ` Cheatham, Benjamin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250925223440.3539069-22-terry.bowman@amd.com \
--to=terry.bowman@amd.com \
--cc=Benjamin.Cheatham@amd.com \
--cc=PradeepVineshReddy.Kodamati@amd.com \
--cc=Smita.KoralahalliChannabasappa@amd.com \
--cc=alison.schofield@intel.com \
--cc=alucerop@amd.com \
--cc=bhelgaas@google.com \
--cc=dan.carpenter@linaro.org \
--cc=dan.j.williams@intel.com \
--cc=dave.jiang@intel.com \
--cc=dave@stgolabs.net \
--cc=ira.weiny@intel.com \
--cc=jonathan.cameron@huawei.com \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=lukas@wunner.de \
--cc=ming.li@zohomail.com \
--cc=rrichter@amd.com \
--cc=sathyanarayanan.kuppuswamy@linux.intel.com \
--cc=shiju.jose@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox