Linux PCI subsystem development
 help / color / mirror / Atom feed
From: Dave Jiang <dave.jiang@intel.com>
To: linux-cxl@vger.kernel.org, linux-pci@vger.kernel.org
Cc: terry.bowman@amd.com, bhelgaas@google.com, jic23@kernel.org,
	djbw@kernel.org
Subject: [RFC PATCH v2 2/2] PCI/CXL: Enable usage of RDPAS to shortcut error device discovery
Date: Thu, 18 Jun 2026 10:07:22 -0700	[thread overview]
Message-ID: <20260618170723.2010490-3-dave.jiang@intel.com> (raw)
In-Reply-To: <20260618170723.2010490-1-dave.jiang@intel.com>

The RDPAS allows the CXL RCH error handler to find the device directly
instead of iterating through a set number of RCiEP in order to discover
which device triggered an error. For the CXL.io protocol, the base
address provided from the cxl_rdpas xarray points to the RCRB of the
device. The RCRB mirrors the configuration space of the device via MMIO.
The error handler can walk the RCRB to find the AER capability block and
therefore read the root status as well as the error source in order
to determine the BDF of the error device.

The entries with cxl.cachemem protocol is ignored because the base address
provided by the RDPAS structure points to the Component Base Register Base
and does not provide a way for th ecode to identify the device that
triggered the error.

Change the current RCH error handler behavior so it will probe the
RCRB first to see if the error device can be discovered quickly
before falling back to the current method of iterating through RCiEPs.

Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
v2:
- Add boundary checks for MMIO reads (sashiko)
- Add checks for surprise removal of devices (sashiko)
- Use aer_info to also check severity. (Ming)
- Update to iterate list of RPs under a RCEC entry.
---
 drivers/pci/pcie/aer_cxl_rch.c | 152 ++++++++++++++++++++++++++++++++-
 1 file changed, 148 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/pcie/aer_cxl_rch.c b/drivers/pci/pcie/aer_cxl_rch.c
index eaab7698217e..f295e4eefbba 100644
--- a/drivers/pci/pcie/aer_cxl_rch.c
+++ b/drivers/pci/pcie/aer_cxl_rch.c
@@ -118,7 +118,7 @@ int cxl_rdpas_init(struct device *host)
 }
 EXPORT_SYMBOL_FOR_MODULES(cxl_rdpas_init, "cxl_acpi");
 
-static struct cxl_rdpas_rcec __maybe_unused *cxl_get_rdpas_by_rcec(struct pci_dev *rcec)
+static struct cxl_rdpas_rcec *cxl_get_rdpas_by_rcec(struct pci_dev *rcec)
 {
 	unsigned long index;
 
@@ -166,6 +166,143 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
 	return 0;
 }
 
+static u16 rcrb_to_aer(void __iomem *rcrb)
+{
+	/*
+	 * The extended capability space is SZ_4K and each capability header
+	 * is dword aligned, so the chain can hold at most SZ_4K / 4 entries.
+	 * Bound the walk by that count to avoid spinning on a malformed,
+	 * looping capability list.
+	 */
+	int entries = SZ_4K / 4;
+	u16 offset;
+	u32 cap_hdr;
+
+	/* Start from PCIe extended capabilities at offset 0x100 */
+	offset = PCI_CFG_SPACE_SIZE;
+	cap_hdr = readl(rcrb + offset);
+	if (cap_hdr == 0 || PCI_POSSIBLE_ERROR(cap_hdr))
+		return 0;
+
+	while (PCI_EXT_CAP_ID(cap_hdr) != PCI_EXT_CAP_ID_ERR) {
+		if (--entries <= 0)
+			return 0;
+
+		offset = PCI_EXT_CAP_NEXT(cap_hdr);
+		if (!offset)
+			return 0;
+
+		if (offset >= SZ_4K)
+			return 0;
+
+		cap_hdr = readl(rcrb + offset);
+		if (cap_hdr == 0 || PCI_POSSIBLE_ERROR(cap_hdr))
+			return 0;
+	}
+
+	return offset;
+}
+
+DEFINE_FREE(iounmap, void __iomem *, if (_T) iounmap(_T))
+static u16 cxl_rch_get_err_src_id(u64 rcrb_base, struct aer_err_info *info)
+{
+	u32 root_status, err_src;
+	void __iomem *aer_base;
+	u16 aer_offset;
+
+	void __iomem *rcrb __free(iounmap) = ioremap(rcrb_base, SZ_4K);
+	if (!rcrb)
+		return 0;
+
+	aer_offset = rcrb_to_aer(rcrb);
+	if (!aer_offset)
+		return 0;
+
+	aer_base = rcrb + aer_offset;
+	if (aer_offset + PCI_ERR_ROOT_STATUS + sizeof(u32) > SZ_4K)
+		return 0;
+
+	root_status = readl(aer_base + PCI_ERR_ROOT_STATUS);
+	if (!(root_status & (PCI_ERR_ROOT_COR_RCV | PCI_ERR_ROOT_UNCOR_RCV)))
+		return 0;
+
+	if (aer_offset + PCI_ERR_ROOT_ERR_SRC + sizeof(u32) > SZ_4K)
+		return 0;
+
+	err_src = readl(aer_base + PCI_ERR_ROOT_ERR_SRC);
+
+	if (info->severity == AER_CORRECTABLE &&
+	    root_status & PCI_ERR_ROOT_COR_RCV)
+		return FIELD_GET(GENMASK(15, 0), err_src);
+
+	/* Assume at this point the info->severity points to UNCOR */
+	if (root_status & PCI_ERR_ROOT_UNCOR_RCV)
+		return FIELD_GET(GENMASK(31, 16), err_src);
+
+	return 0;
+}
+
+static bool cxl_rch_forward_error_by_dsp(struct pci_dev *rcec, u64 rcrb_base,
+					 struct aer_err_info *info)
+{
+	u8 bus, devfn;
+	u16 segment;
+	u16 src_id;
+
+	src_id = cxl_rch_get_err_src_id(rcrb_base, info);
+	if (!src_id)
+		return false;
+
+	/* Try uncorrectable error source first, then correctable */
+	segment = pci_domain_nr(rcec->bus);
+	bus = FIELD_GET(GENMASK(15, 8), src_id);
+	devfn = FIELD_GET(GENMASK(7, 0), src_id);
+
+	struct pci_dev *pdev __free(pci_dev_put) =
+		pci_get_domain_bus_and_slot(segment, bus, devfn);
+	if (!pdev)
+		return false;
+
+	/*
+	 * The error source id resolves to whatever BDF the root port logged,
+	 * which is not guaranteed to be a natively handled CXL.mem device.
+	 * Apply the same gating as the RCiEP walk fallback before forwarding.
+	 */
+	if (!is_cxl_mem_dev(pdev) || !cxl_error_is_native(pdev))
+		return false;
+
+	cxl_forward_error(pdev, info);
+	return true;
+}
+
+static bool cxl_rch_handled_error_by_rdpas(struct pci_dev *rcec,
+					   struct aer_err_info *info)
+{
+	struct cxl_rdpas_rcec *rdpas_rcec;
+	struct cxl_rdpas_entry *entry;
+	bool handled = false;
+
+	rdpas_rcec = cxl_get_rdpas_by_rcec(rcec);
+	if (!rdpas_rcec)
+		return false;
+
+	/*
+	 * The RCEC aggregates multiple downstream ports. Each CXL.io
+	 * downstream port associated with this RCEC exposes the RCRB at its
+	 * base address; walk them all and forward the error from every port
+	 * that reports a valid error source.
+	 */
+	list_for_each_entry(entry, &rdpas_rcec->ports, list) {
+		if (entry->protocol != ACPI_CEDT_RDPAS_PROTOCOL_IO)
+			continue;
+
+		if (cxl_rch_forward_error_by_dsp(rcec, entry->address, info))
+			handled = true;
+	}
+
+	return handled;
+}
+
 void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
 {
 	/*
@@ -173,9 +310,16 @@ void cxl_rch_handle_error(struct pci_dev *dev, struct aer_err_info *info)
 	 * RCH's downstream port. Check and handle them in the CXL.mem
 	 * device driver.
 	 */
-	if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
-	    is_aer_internal_error(info))
-		pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
+	if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_EC)
+		return;
+
+	if (!is_aer_internal_error(info))
+		return;
+
+	if (cxl_rch_handled_error_by_rdpas(dev, info))
+		return;
+
+	pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
 }
 
 static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
-- 
2.54.0


  parent reply	other threads:[~2026-06-18 17:07 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-18 17:07 [RFC PATCH v2 0/2] PCI/CXL: Add RDPAS support for CXL.io Dave Jiang
2026-06-18 17:07 ` [RFC PATCH v2 1/2] PCI/CXL: Add RDPAS parsing support Dave Jiang
2026-06-18 17:19   ` sashiko-bot
2026-06-18 17:07 ` Dave Jiang [this message]
2026-06-18 17:20   ` [RFC PATCH v2 2/2] PCI/CXL: Enable usage of RDPAS to shortcut error device discovery sashiko-bot
2026-06-18 19:05 ` [RFC PATCH v2 0/2] PCI/CXL: Add RDPAS support for CXL.io Bowman, Terry
2026-06-18 20:12   ` Dave Jiang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260618170723.2010490-3-dave.jiang@intel.com \
    --to=dave.jiang@intel.com \
    --cc=bhelgaas@google.com \
    --cc=djbw@kernel.org \
    --cc=jic23@kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=terry.bowman@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox