From: Terry Bowman <terry.bowman@amd.com>
To: <ming4.li@intel.com>, <linux-cxl@vger.kernel.org>,
<linux-kernel@vger.kernel.org>, <linux-pci@vger.kernel.org>,
<dave@stgolabs.net>, <jonathan.cameron@huawei.com>,
<dave.jiang@intel.com>, <alison.schofield@intel.com>,
<vishal.l.verma@intel.com>, <dan.j.williams@intel.com>,
<bhelgaas@google.com>, <mahesh@linux.ibm.com>, <oohall@gmail.com>,
<Benjamin.Cheatham@amd.com>, <rrichter@amd.com>,
<nathan.fontenot@amd.com>,
<smita.koralahallichannabasappa@amd.com>, <terry.bowman@amd.com>
Subject: [PATCH 04/15] cxl/aer/pci: Add CXL PCIe port correctable error support in AER service driver
Date: Tue, 8 Oct 2024 17:16:46 -0500 [thread overview]
Message-ID: <20241008221657.1130181-5-terry.bowman@amd.com> (raw)
In-Reply-To: <20241008221657.1130181-1-terry.bowman@amd.com>
The AER service driver currently does not manage CXL PCIe port
protocol errors reported by CXL root ports, CXL upstream switch ports,
and CXL downstream switch ports. Consequently, RAS protocol errors
from CXL PCIe port devices are not properly logged or handled.
These errors are reported to the OS via the root port's AER correctable
and uncorrectable internal error fields. While the AER driver supports
handling downstream port protocol errors in restricted CXL host (RCH)
mode also known as CXL1.1, it lacks the same functionality for CXL
PCIe ports operating in virtual hierarchy (VH) mode, introduced in
CXL2.0.
To address this gap, update the AER driver to handle CXL PCIe port
device protocol correctable errors (CE).
The uncorrectable error handling (UCE) will be added in a future
patch.
Make this update alongside the existing downstream port RCH error
handling logic, extending support to CXL PCIe ports in VH.
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
---
drivers/pci/pcie/aer.c | 54 +++++++++++++++++++++++++++++++++---------
1 file changed, 43 insertions(+), 11 deletions(-)
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index dc8b17999001..1c996287d4ce 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -40,6 +40,8 @@
#define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */
#define AER_MAX_TYPEOF_UNCOR_ERRS 27 /* as per PCI_ERR_UNCOR_STATUS*/
+#define CXL_DVSEC_PORT_EXTENSIONS 3
+
struct aer_err_source {
u32 status; /* PCI_ERR_ROOT_STATUS */
u32 id; /* PCI_ERR_ROOT_ERR_SRC */
@@ -941,6 +943,17 @@ static bool find_source_device(struct pci_dev *parent,
return true;
}
+static bool is_pcie_cxl_port(struct pci_dev *dev)
+{
+ if ((pci_pcie_type(dev) != PCI_EXP_TYPE_ROOT_PORT) &&
+ (pci_pcie_type(dev) != PCI_EXP_TYPE_UPSTREAM) &&
+ (pci_pcie_type(dev) != PCI_EXP_TYPE_DOWNSTREAM))
+ return false;
+
+ return (!!pci_find_dvsec_capability(dev, PCI_VENDOR_ID_CXL,
+ CXL_DVSEC_PORT_EXTENSIONS));
+}
+
static bool is_internal_error(struct aer_err_info *info)
{
if (info->severity == AER_CORRECTABLE)
@@ -1032,14 +1045,22 @@ static int cxl_rch_handle_error_iter(struct pci_dev *dev, void *data)
static void cxl_handle_error(struct pci_dev *dev, struct aer_err_info *info)
{
- /*
- * Internal errors of an RCEC indicate an AER error in an
- * RCH's downstream port. Check and handle them in the CXL.mem
- * device driver.
- */
- if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
- is_internal_error(info))
+ if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC)
pcie_walk_rcec(dev, cxl_rch_handle_error_iter, info);
+
+ if (info->severity == AER_CORRECTABLE) {
+ struct cxl_port_err_hndlrs *cxl_port_hndlrs =
+ find_cxl_port_hndlrs();
+ int aer = dev->aer_cap;
+
+ if (aer)
+ pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
+ info->status);
+
+ if (cxl_port_hndlrs && cxl_port_hndlrs->cor_error_detected)
+ cxl_port_hndlrs->cor_error_detected(dev);
+ pcie_clear_device_status(dev);
+ }
}
static int handles_cxl_error_iter(struct pci_dev *dev, void *data)
@@ -1057,9 +1078,13 @@ static bool handles_cxl_errors(struct pci_dev *dev)
{
bool handles_cxl = false;
- if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC &&
- pcie_aer_is_native(dev))
+ if (!pcie_aer_is_native(dev))
+ return false;
+
+ if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC)
pcie_walk_rcec(dev, handles_cxl_error_iter, &handles_cxl);
+ else
+ handles_cxl = is_pcie_cxl_port(dev);
return handles_cxl;
}
@@ -1077,6 +1102,10 @@ static void cxl_enable_internal_errors(struct pci_dev *dev)
static inline void cxl_enable_internal_errors(struct pci_dev *dev) { }
static inline void cxl_handle_error(struct pci_dev *dev,
struct aer_err_info *info) { }
+static bool handles_cxl_errors(struct pci_dev *dev)
+{
+ return false;
+}
#endif
void register_cxl_port_hndlrs(struct cxl_port_err_hndlrs *_cxl_port_hndlrs)
@@ -1134,8 +1163,11 @@ static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info)
static void handle_error_source(struct pci_dev *dev, struct aer_err_info *info)
{
- cxl_handle_error(dev, info);
- pci_aer_handle_error(dev, info);
+ if (is_internal_error(info) && handles_cxl_errors(dev))
+ cxl_handle_error(dev, info);
+ else
+ pci_aer_handle_error(dev, info);
+
pci_dev_put(dev);
}
--
2.34.1
next prev parent reply other threads:[~2024-10-08 22:17 UTC|newest]
Thread overview: 62+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-08 22:16 [PATCH 0/15] Enable CXL PCIe port protocol error handling and logging Terry Bowman
2024-10-08 22:16 ` [PATCH 01/15] cxl/aer/pci: Add CXL PCIe port error handler callbacks in AER service driver Terry Bowman
2024-10-22 1:53 ` Dan Williams
2024-10-22 13:50 ` Terry Bowman
2024-10-22 17:09 ` Dan Williams
2024-10-22 18:40 ` Terry Bowman
2024-10-22 23:43 ` Dan Williams
2024-10-24 15:20 ` Bowman, Terry
2024-10-24 19:10 ` Dan Williams
2024-10-08 22:16 ` [PATCH 02/15] cxl/aer/pci: Update is_internal_error() to be callable w/o CONFIG_PCIEAER_CXL Terry Bowman
2024-10-16 16:11 ` Jonathan Cameron
2024-10-22 2:17 ` Dan Williams
2024-10-22 13:54 ` Terry Bowman
2024-10-08 22:16 ` [PATCH 03/15] cxl/aer/pci: Refactor AER driver's existing interfaces to support CXL PCIe ports Terry Bowman
2024-10-10 19:11 ` Bjorn Helgaas
2024-10-14 17:27 ` Terry Bowman
2024-10-08 22:16 ` Terry Bowman [this message]
2024-10-16 16:22 ` [PATCH 04/15] cxl/aer/pci: Add CXL PCIe port correctable error support in AER service driver Jonathan Cameron
2024-10-16 17:18 ` Terry Bowman
2024-10-16 17:29 ` Jonathan Cameron
2024-10-08 22:16 ` [PATCH 05/15] cxl/aer/pci: Update AER driver to read UCE fatal status for all CXL PCIe port devices Terry Bowman
2024-10-16 16:28 ` Jonathan Cameron
2024-10-08 22:16 ` [PATCH 06/15] cxl/aer/pci: Introduce PCI_ERS_RESULT_PANIC to pci_ers_result type Terry Bowman
2024-10-16 16:30 ` Jonathan Cameron
2024-10-16 17:31 ` Terry Bowman
2024-10-17 13:31 ` Jonathan Cameron
2024-10-17 14:50 ` Bowman, Terry
2024-10-08 22:16 ` [PATCH 07/15] cxl/aer/pci: Add CXL PCIe port uncorrectable error recovery in AER service driver Terry Bowman
2024-10-16 16:54 ` Jonathan Cameron
2024-10-16 18:07 ` Terry Bowman
2024-10-17 13:43 ` Jonathan Cameron
2024-10-17 16:21 ` Bowman, Terry
2024-10-17 17:08 ` Jonathan Cameron
2024-10-08 22:16 ` [PATCH 08/15] cxl/pci: Change find_cxl_ports() to be non-static Terry Bowman
2024-10-08 22:16 ` [PATCH 09/15] cxl/pci: Map CXL PCIe downstream port RAS registers Terry Bowman
2024-10-16 17:14 ` Jonathan Cameron
2024-10-16 18:16 ` Terry Bowman
2024-10-17 13:50 ` Jonathan Cameron
2024-10-17 16:26 ` Bowman, Terry
2024-10-08 22:16 ` [PATCH 10/15] cxl/pci: Map CXL PCIe upstream " Terry Bowman
2024-10-08 22:16 ` [PATCH 11/15] cxl/pci: Update RAS handler interfaces to support CXL PCIe ports Terry Bowman
2024-10-08 22:16 ` [PATCH 12/15] cxl/pci: Add error handler for CXL PCIe port RAS errors Terry Bowman
2024-10-17 13:57 ` Jonathan Cameron
2024-10-17 16:42 ` Bowman, Terry
2024-10-08 22:16 ` [PATCH 13/15] cxl/pci: Add trace logging " Terry Bowman
2024-10-17 14:04 ` Jonathan Cameron
2024-10-08 22:16 ` [PATCH 14/15] cxl/aer/pci: Export pci_aer_unmask_internal_errors() Terry Bowman
2024-10-16 17:22 ` Jonathan Cameron
2024-10-08 22:16 ` [PATCH 15/15] cxl/pci: Enable internal CE/UCE interrupts for CXL PCIe port devices Terry Bowman
2024-10-16 17:21 ` Jonathan Cameron
2024-10-16 17:24 ` Terry Bowman
2024-10-10 19:07 ` [PATCH 0/15] Enable CXL PCIe port protocol error handling and logging Bjorn Helgaas
2024-10-14 17:22 ` Terry Bowman
2024-10-14 17:29 ` Bjorn Helgaas
2024-10-14 17:33 ` Terry Bowman
2024-10-17 16:34 ` Fan Ni
2024-10-17 17:27 ` Bowman, Terry
2024-10-21 22:19 ` Fan Ni
2024-10-18 23:22 ` Bjorn Helgaas
2024-10-21 19:22 ` Terry Bowman
2024-10-22 1:43 ` Dan Williams
2024-10-22 13:29 ` Terry Bowman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241008221657.1130181-5-terry.bowman@amd.com \
--to=terry.bowman@amd.com \
--cc=Benjamin.Cheatham@amd.com \
--cc=alison.schofield@intel.com \
--cc=bhelgaas@google.com \
--cc=dan.j.williams@intel.com \
--cc=dave.jiang@intel.com \
--cc=dave@stgolabs.net \
--cc=jonathan.cameron@huawei.com \
--cc=linux-cxl@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-pci@vger.kernel.org \
--cc=mahesh@linux.ibm.com \
--cc=ming4.li@intel.com \
--cc=nathan.fontenot@amd.com \
--cc=oohall@gmail.com \
--cc=rrichter@amd.com \
--cc=smita.koralahallichannabasappa@amd.com \
--cc=vishal.l.verma@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox