All of lore.kernel.org
 help / color / mirror / Atom feed
From: Shiju Jose <shiju.jose@huawei.com>
To: Terry Bowman <terry.bowman@amd.com>,
	"dave@stgolabs.net" <dave@stgolabs.net>,
	Jonathan Cameron <jonathan.cameron@huawei.com>,
	"dave.jiang@intel.com" <dave.jiang@intel.com>,
	"alison.schofield@intel.com" <alison.schofield@intel.com>,
	"dan.j.williams@intel.com" <dan.j.williams@intel.com>,
	"bhelgaas@google.com" <bhelgaas@google.com>,
	"ming.li@zohomail.com" <ming.li@zohomail.com>,
	"Smita.KoralahalliChannabasappa@amd.com"
	<Smita.KoralahalliChannabasappa@amd.com>,
	"rrichter@amd.com" <rrichter@amd.com>,
	"dan.carpenter@linaro.org" <dan.carpenter@linaro.org>,
	"PradeepVineshReddy.Kodamati@amd.com"
	<PradeepVineshReddy.Kodamati@amd.com>,
	"lukas@wunner.de" <lukas@wunner.de>,
	"Benjamin.Cheatham@amd.com" <Benjamin.Cheatham@amd.com>,
	"sathyanarayanan.kuppuswamy@linux.intel.com"
	<sathyanarayanan.kuppuswamy@linux.intel.com>,
	"linux-cxl@vger.kernel.org" <linux-cxl@vger.kernel.org>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-pci@vger.kernel.org" <linux-pci@vger.kernel.org>
Subject: RE: [PATCH v10 03/17] PCI/AER: Report CXL or PCIe bus error type in trace logging
Date: Fri, 27 Jun 2025 11:32:01 +0000	[thread overview]
Message-ID: <b745b33288e540cea2d67286c9e49f8b@huawei.com> (raw)
In-Reply-To: <20250626224252.1415009-4-terry.bowman@amd.com>

>-----Original Message-----
>From: Terry Bowman <terry.bowman@amd.com>
>Sent: 26 June 2025 23:43
>To: dave@stgolabs.net; Jonathan Cameron <jonathan.cameron@huawei.com>;
>dave.jiang@intel.com; alison.schofield@intel.com; dan.j.williams@intel.com;
>bhelgaas@google.com; Shiju Jose <shiju.jose@huawei.com>;
>ming.li@zohomail.com; Smita.KoralahalliChannabasappa@amd.com;
>rrichter@amd.com; dan.carpenter@linaro.org;
>PradeepVineshReddy.Kodamati@amd.com; lukas@wunner.de;
>Benjamin.Cheatham@amd.com;
>sathyanarayanan.kuppuswamy@linux.intel.com; terry.bowman@amd.com;
>linux-cxl@vger.kernel.org
>Cc: linux-kernel@vger.kernel.org; linux-pci@vger.kernel.org
>Subject: [PATCH v10 03/17] PCI/AER: Report CXL or PCIe bus error type in trace
>logging
>
>The AER service driver and aer_event tracing currently log 'PCIe Bus Type'
>for all errors. Update the driver and aer_event tracing to log 'CXL Bus Type' for
>CXL device errors.
>
>This requires the AER can identify and distinguish between PCIe errors and CXL
>errors.
>
>Introduce boolean 'is_cxl' to 'struct aer_err_info'. Add assignment in
>aer_get_device_error_info() and pci_print_aer().
>
>Update the aer_event trace routine to accept a bus type string parameter.
>
>Signed-off-by: Terry Bowman <terry.bowman@amd.com>
>Reviewed-by: Ira Weiny <ira.weiny@intel.com>
>---
> drivers/pci/pci.h       |  6 ++++++
> drivers/pci/pcie/aer.c  | 21 +++++++++++++++------  include/ras/ras_event.h |  9
>++++++---
> 3 files changed, 27 insertions(+), 9 deletions(-)
>
>diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index
>12215ee72afb..a0d1e59b5666 100644
>--- a/drivers/pci/pci.h
>+++ b/drivers/pci/pci.h
>@@ -608,6 +608,7 @@ struct aer_err_info {
> 	int ratelimit_print[AER_MAX_MULTI_ERR_DEVICES];
> 	int error_dev_num;
> 	const char *level;		/* printk level */
>+	bool is_cxl;
>
> 	unsigned int id:16;
>
>@@ -628,6 +629,11 @@ struct aer_err_info {  int
>aer_get_device_error_info(struct aer_err_info *info, int i);  void
>aer_print_error(struct aer_err_info *info, int i);
>
>+static inline const char *aer_err_bus(struct aer_err_info *info) {
>+	return info->is_cxl ? "CXL" : "PCIe";
>+}
>+
> int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
> 		      unsigned int tlp_len, bool flit,
> 		      struct pcie_tlp_log *log);
>diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index
>70ac66188367..a2df9456595a 100644
>--- a/drivers/pci/pcie/aer.c
>+++ b/drivers/pci/pcie/aer.c
>@@ -837,6 +837,7 @@ void aer_print_error(struct aer_err_info *info, int i)
> 	struct pci_dev *dev;
> 	int layer, agent, id;
> 	const char *level = info->level;
>+	const char *bus_type = aer_err_bus(info);
>
> 	if (WARN_ON_ONCE(i >= AER_MAX_MULTI_ERR_DEVICES))
> 		return;
>@@ -845,23 +846,23 @@ void aer_print_error(struct aer_err_info *info, int i)
> 	id = pci_dev_id(dev);
>
> 	pci_dev_aer_stats_incr(dev, info);
>-	trace_aer_event(pci_name(dev), (info->status & ~info->mask),
>+	trace_aer_event(pci_name(dev), bus_type, (info->status & ~info->mask),
> 			info->severity, info->tlp_header_valid, &info->tlp);
>
> 	if (!info->ratelimit_print[i])
> 		return;
>
> 	if (!info->status) {
>-		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible,
>(Unregistered Agent ID)\n",
>-			aer_error_severity_string[info->severity]);
>+		pci_err(dev, "%s Bus Error: severity=%s, type=Inaccessible,
>(Unregistered Agent ID)\n",
>+			bus_type, aer_error_severity_string[info->severity]);
> 		goto out;
> 	}
>
> 	layer = AER_GET_LAYER_ERROR(info->severity, info->status);
> 	agent = AER_GET_AGENT(info->severity, info->status);
>
>-	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
>-		   aer_error_severity_string[info->severity],
>+	aer_printk(level, dev, "%s Bus Error: severity=%s, type=%s, (%s)\n",
>+		   bus_type, aer_error_severity_string[info->severity],
> 		   aer_error_layer[layer], aer_agent_string[agent]);
>
> 	aer_printk(level, dev, "  device [%04x:%04x] error
>status/mask=%08x/%08x\n", @@ -895,6 +896,7 @@
>EXPORT_SYMBOL_GPL(cper_severity_to_aer);
> void pci_print_aer(struct pci_dev *dev, int aer_severity,
> 		   struct aer_capability_regs *aer)
> {
>+	const char *bus_type;
> 	int layer, agent, tlp_header_valid = 0;
> 	u32 status, mask;
> 	struct aer_err_info info = {
>@@ -915,9 +917,12 @@ void pci_print_aer(struct pci_dev *dev, int
>aer_severity,
>
> 	info.status = status;
> 	info.mask = mask;
>+	info.is_cxl = pcie_is_cxl(dev);
>+
>+	bus_type = aer_err_bus(&info);
>
> 	pci_dev_aer_stats_incr(dev, &info);
>-	trace_aer_event(pci_name(dev), (status & ~mask),
>+	trace_aer_event(pci_name(dev), bus_type, (status & ~mask),
> 			aer_severity, tlp_header_valid, &aer->header_log);
>
> 	if (!aer_ratelimit(dev, info.severity)) @@ -939,6 +944,9 @@ void
>pci_print_aer(struct pci_dev *dev, int aer_severity,
> 	if (tlp_header_valid)
> 		pcie_print_tlp_log(dev, &aer->header_log, info.level,
> 				   dev_fmt("  "));
>+
>+	trace_aer_event(dev_name(&dev->dev), bus_type, (status & ~mask),
>+			aer_severity, tlp_header_valid, &aer->header_log);
Hi Terry,

It looks like an extra trace_aer_event() is called here along with the above
trace_aer_event(pci_name(dev),...? 

Thanks,
Shiju
> }
> EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
>
>@@ -1371,6 +1379,7 @@ int aer_get_device_error_info(struct aer_err_info
>*info, int i)
> 	/* Must reset in this function */
> 	info->status = 0;
> 	info->tlp_header_valid = 0;
>+	info->is_cxl = pcie_is_cxl(dev);
>
> 	/* The device might not support AER */
> 	if (!aer)
>diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h index
>14c9f943d53f..080829d59c36 100644
>--- a/include/ras/ras_event.h
>+++ b/include/ras/ras_event.h
>@@ -297,15 +297,17 @@ TRACE_EVENT(non_standard_event,
>
> TRACE_EVENT(aer_event,
> 	TP_PROTO(const char *dev_name,
>+		 const char *bus_type,
> 		 const u32 status,
> 		 const u8 severity,
> 		 const u8 tlp_header_valid,
> 		 struct pcie_tlp_log *tlp),
>
>-	TP_ARGS(dev_name, status, severity, tlp_header_valid, tlp),
>+	TP_ARGS(dev_name, bus_type, status, severity, tlp_header_valid, tlp),
>
> 	TP_STRUCT__entry(
> 		__string(	dev_name,	dev_name	)
>+		__string(	bus_type,	bus_type	)
> 		__field(	u32,		status		)
> 		__field(	u8,		severity	)
> 		__field(	u8, 		tlp_header_valid)
>@@ -314,6 +316,7 @@ TRACE_EVENT(aer_event,
>
> 	TP_fast_assign(
> 		__assign_str(dev_name);
>+		__assign_str(bus_type);
> 		__entry->status		= status;
> 		__entry->severity	= severity;
> 		__entry->tlp_header_valid = tlp_header_valid; @@ -325,8
>+328,8 @@ TRACE_EVENT(aer_event,
> 		}
> 	),
>
>-	TP_printk("%s PCIe Bus Error: severity=%s, %s, TLP Header=%s\n",
>-		__get_str(dev_name),
>+	TP_printk("%s %s Bus Error: severity=%s, %s, TLP Header=%s\n",
>+		__get_str(dev_name), __get_str(bus_type),
> 		__entry->severity == AER_CORRECTABLE ? "Corrected" :
> 			__entry->severity == AER_FATAL ?
> 			"Fatal" : "Uncorrected, non-fatal",
>--
>2.34.1


  parent reply	other threads:[~2025-06-27 11:32 UTC|newest]

Thread overview: 87+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-26 22:42 [PATCH v10 00/17] Enable CXL PCIe Port Protocol Error handling and logging Terry Bowman
2025-06-26 22:42 ` [PATCH v10 01/17] cxl/pci: Remove unnecessary CXL Endpoint handling helper functions Terry Bowman
2025-07-18 17:55   ` Dave Jiang
2025-07-23 21:58   ` dan.j.williams
2025-07-23 22:15     ` Dave Jiang
2025-06-26 22:42 ` [PATCH v10 02/17] PCI/CXL: Add pcie_is_cxl() Terry Bowman
2025-07-23 22:30   ` dan.j.williams
2025-07-23 23:21     ` Bowman, Terry
2025-07-24 18:00       ` dan.j.williams
2025-08-09 10:56   ` Alejandro Lucero Palau
2025-08-11 19:14     ` Bowman, Terry
2025-08-11 23:14       ` dan.j.williams
2025-06-26 22:42 ` [PATCH v10 03/17] PCI/AER: Report CXL or PCIe bus error type in trace logging Terry Bowman
2025-06-26 23:25   ` Sathyanarayanan Kuppuswamy
2025-06-27 14:14     ` Bowman, Terry
2025-06-27  9:53   ` Jonathan Cameron
2025-07-02 16:00     ` Bowman, Terry
2025-06-27 11:32   ` Shiju Jose [this message]
2025-06-27 14:24     ` Bowman, Terry
2025-07-01 21:27   ` Dave Jiang
2025-07-23 22:56   ` dan.j.williams
2025-06-26 22:42 ` [PATCH v10 04/17] CXL/AER: Introduce CXL specific AER driver file Terry Bowman
2025-06-26 23:42   ` Sathyanarayanan Kuppuswamy
2025-06-27 10:12     ` Jonathan Cameron
2025-06-27 14:29     ` Bowman, Terry
2025-07-24  0:01   ` dan.j.williams
2025-07-24 17:06     ` Bowman, Terry
2025-07-24 20:32       ` dan.j.williams
2025-07-24  1:16   ` dan.j.williams
2025-07-24 17:02     ` Bowman, Terry
2025-07-24 20:23       ` dan.j.williams
2025-06-26 22:42 ` [PATCH v10 05/17] CXL/AER: Introduce kfifo for forwarding CXL errors Terry Bowman
2025-06-27 10:24   ` Jonathan Cameron
2025-07-02 16:21     ` Bowman, Terry
2025-07-02 19:54       ` Dan Carpenter
2025-07-02 19:57         ` Bowman, Terry
2025-07-03 10:06       ` Jonathan Cameron
2025-07-01 21:53   ` Dave Jiang
2025-07-02 17:10     ` Bowman, Terry
2025-07-24  2:01   ` dan.j.williams
2025-07-24 17:21     ` Bowman, Terry
2025-07-24 20:55       ` dan.j.williams
2025-06-26 22:42 ` [PATCH v10 06/17] PCI/AER: Dequeue forwarded CXL error Terry Bowman
2025-06-27 11:00   ` Jonathan Cameron
2025-07-02 17:51     ` Bowman, Terry
2025-07-01 23:04   ` Dave Jiang
2025-07-02 17:56     ` Bowman, Terry
2025-07-03 10:11       ` Jonathan Cameron
2025-07-25  0:38   ` dan.j.williams
2025-06-26 22:42 ` [PATCH v10 07/17] CXL/PCI: Introduce CXL uncorrectable protocol error recovery Terry Bowman
2025-06-27 11:05   ` Jonathan Cameron
2025-07-02 21:06     ` Bowman, Terry
2025-06-27 12:27   ` Shiju Jose
2025-07-02 21:34     ` Bowman, Terry
2025-06-26 22:42 ` [PATCH v10 08/17] cxl/pci: Move RAS initialization to cxl_port driver Terry Bowman
2025-06-27 11:12   ` Jonathan Cameron
2025-07-18 18:01   ` Dave Jiang
2025-06-26 22:42 ` [PATCH v10 09/17] cxl/pci: Map CXL Endpoint Port and CXL Switch Port RAS registers Terry Bowman
2025-06-27 11:17   ` Jonathan Cameron
2025-07-02 21:41     ` Bowman, Terry
2025-07-18 21:28   ` Dave Jiang
2025-07-18 21:55     ` Bowman, Terry
2025-07-18 22:01       ` Dave Jiang
2025-07-18 22:40         ` Bowman, Terry
2025-07-18 22:45           ` Dave Jiang
2025-06-26 22:42 ` [PATCH v10 10/17] cxl/pci: Update RAS handler interfaces to also support CXL Ports Terry Bowman
2025-06-26 22:42 ` [PATCH v10 11/17] cxl/pci: Log message if RAS registers are unmapped Terry Bowman
2025-07-21 21:56   ` Dave Jiang
2025-06-26 22:42 ` [PATCH v10 12/17] cxl/pci: Unify CXL trace logging for CXL Endpoints and CXL Ports Terry Bowman
2025-06-27 12:22   ` Shiju Jose
2025-07-02  1:18     ` Alison Schofield
2025-07-02 22:07       ` Bowman, Terry
2025-07-02 21:56     ` Bowman, Terry
2025-06-26 22:42 ` [PATCH v10 13/17] cxl/pci: Update cxl_handle_cor_ras() to return early if no RAS errors Terry Bowman
2025-06-27 11:48   ` Jonathan Cameron
2025-07-21 22:17   ` Dave Jiang
2025-06-26 22:42 ` [PATCH v10 14/17] cxl/pci: Introduce CXL Endpoint protocol error handlers Terry Bowman
2025-06-27 11:52   ` Jonathan Cameron
2025-06-27 12:27   ` Shiju Jose
2025-07-21 22:35   ` Dave Jiang
2025-07-22 18:23     ` Bowman, Terry
2025-06-26 22:42 ` [PATCH v10 15/17] CXL/PCI: Introduce CXL Port " Terry Bowman
2025-06-26 22:42 ` [PATCH v10 16/17] CXL/PCI: Enable CXL protocol errors during CXL Port probe Terry Bowman
2025-06-26 22:42 ` [PATCH v10 17/17] CXL/PCI: Disable CXL protocol error interrupts during CXL Port cleanup Terry Bowman
2025-07-23 21:55 ` [PATCH v10 00/17] Enable CXL PCIe Port Protocol Error handling and logging dan.j.williams
2025-07-24 15:58   ` Bowman, Terry
2025-08-18 15:18 ` Joshua Hahn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b745b33288e540cea2d67286c9e49f8b@huawei.com \
    --to=shiju.jose@huawei.com \
    --cc=Benjamin.Cheatham@amd.com \
    --cc=PradeepVineshReddy.Kodamati@amd.com \
    --cc=Smita.KoralahalliChannabasappa@amd.com \
    --cc=alison.schofield@intel.com \
    --cc=bhelgaas@google.com \
    --cc=dan.carpenter@linaro.org \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=jonathan.cameron@huawei.com \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=lukas@wunner.de \
    --cc=ming.li@zohomail.com \
    --cc=rrichter@amd.com \
    --cc=sathyanarayanan.kuppuswamy@linux.intel.com \
    --cc=terry.bowman@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.