From: "Wang, Qingshun" <qingshun.wang@linux.intel.com>
To: linux-pci@vger.kernel.org
Cc: chao.p.peng@linux.intel.com, chao.p.peng@intel.com,
erwin.tsaur@intel.com, feiting.wanyan@intel.com,
qingshun.wang@intel.com, "Wang,
Qingshun" <qingshun.wang@linux.intel.com>
Subject: [PATCH 2/4] pci/aer: Handle Advisory Non-Fatal properly
Date: Thu, 11 Jan 2024 15:32:17 +0800 [thread overview]
Message-ID: <20240111073227.31488-3-qingshun.wang@linux.intel.com> (raw)
In-Reply-To: <20240111073227.31488-1-qingshun.wang@linux.intel.com>
If we are processing an Advisory Non-Fatal Error, first check the Device
Status. If any of Fatal/Non-Fatal Error Detected bits is set, leave it
to uncorrectable error handler to clear the UE status bit, which should
be executed right after the CE handler in this case.
Otherwise, filter out uncorrectable errors that is not possible to
trigger an Advisory Non-Fatal Error, then clear all the rest status bits.
Reviewed-by: "Tsaur, Erwin" <erwin.tsaur@intel.com>
Signed-off-by: "Wang, Qingshun" <qingshun.wang@linux.intel.com>
---
drivers/pci/pcie/aer.c | 58 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 57 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index 9311323a2391..86e7cfd71f23 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -107,6 +107,12 @@ struct aer_stats {
PCI_ERR_ROOT_MULTI_COR_RCV | \
PCI_ERR_ROOT_MULTI_UNCOR_RCV)
+#define AER_ERR_ANFE_UNC_MASK (PCI_ERR_UNC_POISON_TLP | \
+ PCI_ERR_UNC_COMP_TIME | \
+ PCI_ERR_UNC_COMP_ABORT | \
+ PCI_ERR_UNC_UNX_COMP | \
+ PCI_ERR_UNC_UNSUP)
+
static int pcie_aer_disable;
static pci_ers_result_t aer_root_reset(struct pci_dev *dev);
@@ -612,6 +618,29 @@ const struct attribute_group aer_stats_attr_group = {
.is_visible = aer_stats_attrs_are_visible,
};
+static int anfe_get_related_err(struct aer_err_info *info)
+{
+ /*
+ * Take the most conservative route here. If there are
+ * Non-Fatal/Fatal errors detected, do not assume any
+ * bit in uncor_status is set by ANFE.
+ */
+ if (info->device_status & (PCI_EXP_DEVSTA_NFED | PCI_EXP_DEVSTA_FED))
+ return 0;
+ /*
+ * An UNCOR error may cause Advisory Non-Fatal error if:
+ * a. The severity of the error is Non-Fatal.
+ * b. The error is one of the following:
+ * 1. Poisoned TLP
+ * 2. Completion Timeout
+ * 3. Completer Abort
+ * 4. Unexpected Completion
+ * 5. Unsupported Request
+ */
+ return info->uncor_status & ~info->uncor_mask
+ & AER_ERR_ANFE_UNC_MASK & ~info->severity;
+}
+
static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
struct aer_err_info *info)
{
@@ -678,6 +707,7 @@ static void __aer_print_error(struct pci_dev *dev,
struct aer_err_info *info)
{
unsigned long status;
+ unsigned long anfe_status;
const char **strings;
const char *level, *errmsg;
int i;
@@ -700,6 +730,21 @@ static void __aer_print_error(struct pci_dev *dev,
pci_printk(level, dev, " [%2d] %-22s%s\n", i, errmsg,
info->first_error == i ? " (First)" : "");
}
+
+ if (info->severity == AER_CORRECTABLE && (status & PCI_ERR_COR_ADV_NFAT)) {
+ anfe_status = anfe_get_related_err(info);
+ if (anfe_status) {
+ pci_printk(level, dev, "Uncorrectable errors that may cause Advisory Non-Fatal:");
+ for_each_set_bit(i, &anfe_status, 32) {
+ errmsg = aer_uncorrectable_error_string[i];
+ if (!errmsg)
+ errmsg = "Unknown Error Bit";
+
+ pci_printk(level, dev, " [%2d] %-22s\n", i, errmsg);
+ }
+ }
+ }
+
pci_dev_aer_stats_incr(dev, info);
}
@@ -1092,6 +1137,14 @@ static inline void cxl_rch_handle_error(struct pci_dev *dev,
struct aer_err_info *info) { }
#endif
+static void handle_advisory_nonfatal(struct pci_dev *dev, struct aer_err_info *info)
+{
+ int aer = dev->aer_cap;
+
+ pci_write_config_dword(dev, aer + PCI_ERR_UNCOR_STATUS,
+ anfe_get_related_err(info));
+}
+
/**
* pci_aer_handle_error - handle logging error into an event log
* @dev: pointer to pci_dev data structure of error source device
@@ -1108,9 +1161,12 @@ static void pci_aer_handle_error(struct pci_dev *dev, struct aer_err_info *info)
* Correctable error does not need software intervention.
* No need to go through error recovery process.
*/
- if (aer)
+ if (aer) {
pci_write_config_dword(dev, aer + PCI_ERR_COR_STATUS,
info->cor_status);
+ if (info->cor_status & PCI_ERR_COR_ADV_NFAT)
+ handle_advisory_nonfatal(dev, info);
+ }
if (pcie_aer_is_native(dev)) {
struct pci_driver *pdrv = dev->driver;
--
2.42.0
next prev parent reply other threads:[~2024-01-11 7:33 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-11 7:32 [PATCH 0/4] pci/aer: Handle Advisory Non-Fatal properly Wang, Qingshun
2024-01-11 7:32 ` [PATCH 1/4] pci/aer: Store more information in aer_err_info Wang, Qingshun
2024-01-11 11:27 ` Ilpo Järvinen
2024-01-12 3:21 ` Wang, Qingshun
2024-01-12 16:32 ` Bjorn Helgaas
2024-01-16 8:35 ` Wang, Qingshun
2024-01-11 7:32 ` Wang, Qingshun [this message]
2024-01-12 16:35 ` [PATCH 2/4] pci/aer: Handle Advisory Non-Fatal properly Bjorn Helgaas
2024-01-16 8:42 ` Wang, Qingshun
2024-01-11 7:32 ` [PATCH 3/4] pci/aer: Fetch information for FTrace Wang, Qingshun
2024-01-11 7:32 ` [PATCH 4/4] ras: Trace more information in aer_event Wang, Qingshun
2024-01-12 3:23 ` [PATCH 0/4] pci/aer: Handle Advisory Non-Fatal properly Wang, Qingshun
2024-01-12 16:41 ` Bjorn Helgaas
2024-01-16 8:32 ` Wang, Qingshun
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240111073227.31488-3-qingshun.wang@linux.intel.com \
--to=qingshun.wang@linux.intel.com \
--cc=chao.p.peng@intel.com \
--cc=chao.p.peng@linux.intel.com \
--cc=erwin.tsaur@intel.com \
--cc=feiting.wanyan@intel.com \
--cc=linux-pci@vger.kernel.org \
--cc=qingshun.wang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox