All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@linux.intel.com>
To: "Fabio M. De Francesco" <fabio.m.de.francesco@linux.intel.com>,
	"Rafael J . Wysocki" <rafael@kernel.org>,
	Len Brown <lenb@kernel.org>, Davidlohr Bueso <dave@stgolabs.net>,
	Jonathan Cameron <jonathan.cameron@huawei.com>,
	Dave Jiang <dave.jiang@intel.com>,
	Alison Schofield <alison.schofield@intel.com>,
	Vishal Verma <vishal.l.verma@intel.com>,
	Ira Weiny <ira.weiny@intel.com>,
	Dan Williams <dan.j.williams@intel.com>,
	Mahesh J Salgaonkar <mahesh@linux.ibm.com>,
	Oliver O'Halloran <oohall@gmail.com>,
	Bjorn Helgaas <bhelgaas@google.com>,
	Tony Luck <tony.luck@intel.com>, Borislav Petkov <bp@alien8.de>,
	linux-kernel@vger.kernel.org, linux-acpi@vger.kernel.org,
	linux-cxl@vger.kernel.org, linuxppc-dev@lists.ozlabs.org,
	linux-pci@vger.kernel.org, linux-edac@vger.kernel.org
Cc: Yazen Ghannam <yazen.ghannam@amd.com>
Subject: Re: [PATCH 3/4 v3] ACPI: extlog: Trace CPER PCI Express Error Section
Date: Tue, 3 Jun 2025 15:18:42 -0700	[thread overview]
Message-ID: <b5c6a148-801e-46d5-bc8a-a550006dbd52@linux.intel.com> (raw)
In-Reply-To: <20250603155536.577493-4-fabio.m.de.francesco@linux.intel.com>


On 6/3/25 8:54 AM, Fabio M. De Francesco wrote:
> I/O Machine Check Architecture events may signal failing PCIe components
> or links. The AER event contains details on what was happening on the wire
> when the error was signaled.
>
> Trace the CPER PCIe Error section (UEFI v2.10, Appendix N.2.7) reported
> by the I/O MCA.
>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com>
> ---
>   drivers/acpi/Kconfig       |  1 +
>   drivers/acpi/acpi_extlog.c | 32 ++++++++++++++++++++++++++++++++
>   drivers/pci/pcie/aer.c     |  2 +-
>   include/linux/aer.h        |  8 ++++++--
>   4 files changed, 40 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
> index 7bc40c2735ac0..2bbd9e4868ad7 100644
> --- a/drivers/acpi/Kconfig
> +++ b/drivers/acpi/Kconfig
> @@ -493,6 +493,7 @@ config ACPI_EXTLOG
>   	tristate "Extended Error Log support"
>   	depends on X86_MCE && X86_LOCAL_APIC && EDAC
>   	select UEFI_CPER
> +	select ACPI_APEI_PCIEAER
>   	help
>   	  Certain usages such as Predictive Failure Analysis (PFA) require
>   	  more information about the error than what can be described in
> diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c
> index 47d11cb5c9120..b2928ff297eda 100644
> --- a/drivers/acpi/acpi_extlog.c
> +++ b/drivers/acpi/acpi_extlog.c
> @@ -132,6 +132,34 @@ static int print_extlog_rcd(const char *pfx,
>   	return 1;
>   }
>   
> +static void extlog_print_pcie(struct cper_sec_pcie *pcie_err,
> +			      int severity)
> +{
> +	struct aer_capability_regs *aer;
> +	struct pci_dev *pdev;
> +	unsigned int devfn;
> +	unsigned int bus;
> +	int aer_severity;
> +	int domain;
> +
> +	if (!(pcie_err->validation_bits & CPER_PCIE_VALID_DEVICE_ID ||
> +	      pcie_err->validation_bits & CPER_PCIE_VALID_AER_INFO))
> +		return;
> +
> +	aer_severity = cper_severity_to_aer(severity);
> +	aer = (struct aer_capability_regs *)pcie_err->aer_info;
> +	domain = pcie_err->device_id.segment;
> +	bus = pcie_err->device_id.bus;
> +	devfn = PCI_DEVFN(pcie_err->device_id.device,
> +			  pcie_err->device_id.function);
> +	pdev = pci_get_domain_bus_and_slot(domain, bus, devfn);
> +	if (!pdev)
> +		return;
> +
> +	pci_print_aer(KERN_DEBUG, pdev, aer_severity, aer);
> +	pci_dev_put(pdev);
> +}
> +
>   static int extlog_print(struct notifier_block *nb, unsigned long val,
>   			void *data)
>   {
> @@ -183,6 +211,10 @@ static int extlog_print(struct notifier_block *nb, unsigned long val,
>   			if (gdata->error_data_length >= sizeof(*mem))
>   				trace_extlog_mem_event(mem, err_seq, fru_id, fru_text,
>   						       (u8)gdata->error_severity);
> +		} else if (guid_equal(sec_type, &CPER_SEC_PCIE)) {
> +			struct cper_sec_pcie *pcie_err = acpi_hest_get_payload(gdata);
> +
> +			extlog_print_pcie(pcie_err, gdata->error_severity);
>   		} else {
>   			void *err = acpi_hest_get_payload(gdata);
>   
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index d0ebf7c15afa9..627fcf4346983 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -801,7 +801,7 @@ void pci_print_aer(char *level, struct pci_dev *dev, int aer_severity,
>   	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
>   			aer_severity, tlp_header_valid, &aer->header_log);
>   }
> -EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
> +EXPORT_SYMBOL_GPL(pci_print_aer);
>   
>   /**
>    * add_error_device - list device to be handled
> diff --git a/include/linux/aer.h b/include/linux/aer.h
> index 45d0fb2e2e759..6ce433cee4625 100644
> --- a/include/linux/aer.h
> +++ b/include/linux/aer.h
> @@ -56,16 +56,20 @@ struct aer_capability_regs {
>   #if defined(CONFIG_PCIEAER)
>   int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
>   int pcie_aer_is_native(struct pci_dev *dev);
> +void pci_print_aer(char *level, struct pci_dev *dev, int aer_severity,
> +		   struct aer_capability_regs *aer);
>   #else
>   static inline int pci_aer_clear_nonfatal_status(struct pci_dev *dev)
>   {
>   	return -EINVAL;
>   }
>   static inline int pcie_aer_is_native(struct pci_dev *dev) { return 0; }
> +static inline void pci_print_aer(char *level, struct pci_dev *dev,
> +				 int aer_severity,
> +				 struct aer_capability_regs *aer)
> +{ }

This also needs rebase. With the patches queued for v6.16, you may not
need to pass level any more.

>   #endif
>   
> -void pci_print_aer(char *level, struct pci_dev *dev, int aer_severity,
> -		   struct aer_capability_regs *aer);
>   int cper_severity_to_aer(int cper_severity);
>   void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn,
>   		       int severity, struct aer_capability_regs *aer_regs);

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer


  parent reply	other threads:[~2025-06-03 22:18 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-06-03 15:54 [PATCH 0/4 v3] Make ELOG and GHES log and trace consistently Fabio M. De Francesco
2025-06-03 15:54 ` [PATCH 1/4 v3] ACPI: extlog: Trace CPER Non-standard Section Body Fabio M. De Francesco
2025-06-03 21:14   ` Dave Jiang
2025-06-04  5:49   ` Zhuo, Qiuxu
2025-06-03 15:54 ` [PATCH 2/4 v3] PCI/AER: Modify pci_print_aer() to take log level Fabio M. De Francesco
2025-06-03 20:51   ` Sathyanarayanan Kuppuswamy
2025-06-03 21:23   ` Dave Jiang
2025-06-03 15:54 ` [PATCH 3/4 v3] ACPI: extlog: Trace CPER PCI Express Error Section Fabio M. De Francesco
2025-06-03 21:33   ` Dave Jiang
2025-06-03 21:49   ` Dave Jiang
2025-06-03 22:18   ` Sathyanarayanan Kuppuswamy [this message]
2025-06-04 14:25   ` Zhuo, Qiuxu
2025-06-03 15:54 ` [PATCH 4/4 v3] ACPI: extlog: Trace CPER CXL Protocol " Fabio M. De Francesco
2025-06-03 21:44   ` Dave Jiang
2025-06-03 22:11   ` Sathyanarayanan Kuppuswamy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=b5c6a148-801e-46d5-bc8a-a550006dbd52@linux.intel.com \
    --to=sathyanarayanan.kuppuswamy@linux.intel.com \
    --cc=alison.schofield@intel.com \
    --cc=bhelgaas@google.com \
    --cc=bp@alien8.de \
    --cc=dan.j.williams@intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=fabio.m.de.francesco@linux.intel.com \
    --cc=ira.weiny@intel.com \
    --cc=jonathan.cameron@huawei.com \
    --cc=lenb@kernel.org \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=linuxppc-dev@lists.ozlabs.org \
    --cc=mahesh@linux.ibm.com \
    --cc=oohall@gmail.com \
    --cc=rafael@kernel.org \
    --cc=tony.luck@intel.com \
    --cc=vishal.l.verma@intel.com \
    --cc=yazen.ghannam@amd.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.