All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@linux.intel.com>
To: Jon Pan-Doh <pandoh@google.com>,
	Bjorn Helgaas <bhelgaas@google.com>,
	Karolina Stolarek <karolina.stolarek@oracle.com>
Cc: linux-pci@vger.kernel.org,
	"Martin Petersen" <martin.petersen@oracle.com>,
	"Ben Fuller" <ben.fuller@oracle.com>,
	"Drew Walton" <drewwalton@microsoft.com>,
	"Anil Agrawal" <anilagrawal@meta.com>,
	"Tony Luck" <tony.luck@intel.com>,
	"Ilpo Järvinen" <ilpo.jarvinen@linux.intel.com>,
	"Lukas Wunner" <lukas@wunner.de>,
	"Jonathan Cameron" <Jonathan.Cameron@huawei.com>,
	"Terry Bowman" <Terry.bowman@amd.com>
Subject: Re: [PATCH v3 1/8] PCI/AER: Check log level once and propagate down
Date: Wed, 19 Mar 2025 17:07:30 -0700	[thread overview]
Message-ID: <256a57a6-e2a8-414c-bf48-34b1cdd7ddc8@linux.intel.com> (raw)
In-Reply-To: <20250319084050.366718-2-pandoh@google.com>

Hi,

On 3/19/25 1:40 AM, Jon Pan-Doh wrote:
> From: Karolina Stolarek <karolina.stolarek@oracle.com>
>
> When reporting an AER error, we check its type multiple times
> to determine the log level for each message. Do this check only
> in the top-level functions (aer_isr_one_error(), pci_print_aer()) and
> propagate the result down the call chain.
>
> Signed-off-by: Karolina Stolarek <karolina.stolarek@oracle.com>
> Signed-off-by: Jon Pan-Doh <pandoh@google.com>
> Reviewed-by: Jon Pan-Doh <pandoh@google.com>
> ---

With my comments fixed, you can add

Reviewed-by: Kuppuswamy Sathyanarayanan 
<sathyanarayanan.kuppuswamy@linux.intel.com>

>   drivers/pci/pci.h      |  2 +-
>   drivers/pci/pcie/aer.c | 37 ++++++++++++++++++++-----------------
>   drivers/pci/pcie/dpc.c |  2 +-
>   3 files changed, 22 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
> index b8911d1e10dc..75985b96ecc1 100644
> --- a/drivers/pci/pci.h
> +++ b/drivers/pci/pci.h
> @@ -551,7 +551,7 @@ struct aer_err_info {
>   };
>   
>   int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
> -void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
> +void aer_print_error(struct pci_dev *dev, struct aer_err_info *info, const char *level);
>   
>   int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
>   		      unsigned int tlp_len, bool flit,
> diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
> index 9cff7069577e..cc9c80cd88f3 100644
> --- a/drivers/pci/pcie/aer.c
> +++ b/drivers/pci/pcie/aer.c
> @@ -670,20 +670,18 @@ static void pci_rootport_aer_stats_incr(struct pci_dev *pdev,
>   }
>   
>   static void __aer_print_error(struct pci_dev *dev,
> -			      struct aer_err_info *info)
> +			      struct aer_err_info *info,
> +			      const char *level)
>   {
>   	const char **strings;
>   	unsigned long status = info->status & ~info->mask;
> -	const char *level, *errmsg;
> +	const char *errmsg;
>   	int i;
>   
> -	if (info->severity == AER_CORRECTABLE) {
> +	if (info->severity == AER_CORRECTABLE)
>   		strings = aer_correctable_error_string;
> -		level = KERN_WARNING;
> -	} else {
> +	else
>   		strings = aer_uncorrectable_error_string;
> -		level = KERN_ERR;
> -	}
>   
>   	for_each_set_bit(i, &status, 32) {
>   		errmsg = strings[i];
> @@ -696,11 +694,11 @@ static void __aer_print_error(struct pci_dev *dev,
>   	pci_dev_aer_stats_incr(dev, info);
>   }
>   
> -void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
> +void aer_print_error(struct pci_dev *dev, struct aer_err_info *info,
> +		     const char *level)
>   {
>   	int layer, agent;
>   	int id = pci_dev_id(dev);
> -	const char *level;
>   
>   	if (!info->status) {
>   		pci_err(dev, "PCIe Bus Error: severity=%s, type=Inaccessible, (Unregistered Agent ID)\n",
> @@ -711,8 +709,6 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>   	layer = AER_GET_LAYER_ERROR(info->severity, info->status);
>   	agent = AER_GET_AGENT(info->severity, info->status);
>   
> -	level = (info->severity == AER_CORRECTABLE) ? KERN_WARNING : KERN_ERR;
> -
>   	aer_printk(level, dev, "PCIe Bus Error: severity=%s, type=%s, (%s)\n",
>   		   aer_error_severity_string[info->severity],
>   		   aer_error_layer[layer], aer_agent_string[agent]);
> @@ -720,7 +716,7 @@ void aer_print_error(struct pci_dev *dev, struct aer_err_info *info)
>   	aer_printk(level, dev, "  device [%04x:%04x] error status/mask=%08x/%08x\n",
>   		   dev->vendor, dev->device, info->status, info->mask);
>   
> -	__aer_print_error(dev, info);
> +	__aer_print_error(dev, info, level);
>   
>   	if (info->tlp_header_valid)
>   		pcie_print_tlp_log(dev, &info->tlp, dev_fmt("  "));
> @@ -765,15 +761,18 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>   {
>   	int layer, agent, tlp_header_valid = 0;
>   	u32 status, mask;
> +	const char *level;
>   	struct aer_err_info info;
>   
>   	if (aer_severity == AER_CORRECTABLE) {
>   		status = aer->cor_status;
>   		mask = aer->cor_mask;
> +		level = KERN_WARNING;
>   	} else {
>   		status = aer->uncor_status;
>   		mask = aer->uncor_mask;
>   		tlp_header_valid = status & AER_LOG_TLP_MASKS;
> +		level = KERN_ERR;
>   	}
>   
>   	layer = AER_GET_LAYER_ERROR(aer_severity, status);
> @@ -786,7 +785,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>   	info.first_error = PCI_ERR_CAP_FEP(aer->cap_control);
>   
>   	pci_err(dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
> -	__aer_print_error(dev, &info);
> +	__aer_print_error(dev, &info, level);
>   	pci_err(dev, "aer_layer=%s, aer_agent=%s\n",
>   		aer_error_layer[layer], aer_agent_string[agent]);
>   
> @@ -1257,14 +1256,15 @@ int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info)
>   	return 1;
>   }
>   
> -static inline void aer_process_err_devices(struct aer_err_info *e_info)
> +static inline void aer_process_err_devices(struct aer_err_info *e_info,
> +					   const char *level)
>   {
>   	int i;
>   
>   	/* Report all before handle them, not to lost records by reset etc. */
>   	for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
>   		if (aer_get_device_error_info(e_info->dev[i], e_info))
> -			aer_print_error(e_info->dev[i], e_info);
> +			aer_print_error(e_info->dev[i], e_info, level);
>   	}
>   	for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
>   		if (aer_get_device_error_info(e_info->dev[i], e_info))
> @@ -1282,6 +1282,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
>   {
>   	struct pci_dev *pdev = rpc->rpd;
>   	struct aer_err_info e_info;
> +	const char *level;
>   
>   	pci_rootport_aer_stats_incr(pdev, e_src);
>   
> @@ -1292,6 +1293,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
>   	if (e_src->status & PCI_ERR_ROOT_COR_RCV) {
>   		e_info.id = ERR_COR_ID(e_src->id);
>   		e_info.severity = AER_CORRECTABLE;
> +		level = KERN_WARNING;
>   
>   		if (e_src->status & PCI_ERR_ROOT_MULTI_COR_RCV)
>   			e_info.multi_error_valid = 1;
> @@ -1300,11 +1302,12 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
>   		aer_print_port_info(pdev, &e_info);
>   
>   		if (find_source_device(pdev, &e_info))
> -			aer_process_err_devices(&e_info);
> +			aer_process_err_devices(&e_info, level);

Since this path is only taken for correctable error, you can directly use
aer_process_err_devices(&e_info, KERN_WARNING)

>   	}
>   
>   	if (e_src->status & PCI_ERR_ROOT_UNCOR_RCV) {
>   		e_info.id = ERR_UNCOR_ID(e_src->id);
> +		level = KERN_ERR;
>   
>   		if (e_src->status & PCI_ERR_ROOT_FATAL_RCV)
>   			e_info.severity = AER_FATAL;
> @@ -1319,7 +1322,7 @@ static void aer_isr_one_error(struct aer_rpc *rpc,
>   		aer_print_port_info(pdev, &e_info);
>   
>   		if (find_source_device(pdev, &e_info))
> -			aer_process_err_devices(&e_info);
> +			aer_process_err_devices(&e_info, level);

Same as above.

aer_process_err_devices(&e_info, KERN_ERR)

>   	}
>   }
>   
> diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
> index df42f15c9829..9e4c9ac737a7 100644
> --- a/drivers/pci/pcie/dpc.c
> +++ b/drivers/pci/pcie/dpc.c
> @@ -289,7 +289,7 @@ void dpc_process_error(struct pci_dev *pdev)
>   	else if (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR &&
>   		 dpc_get_aer_uncorrect_severity(pdev, &info) &&
>   		 aer_get_device_error_info(pdev, &info)) {
> -		aer_print_error(pdev, &info);
> +		aer_print_error(pdev, &info, KERN_ERR);
>   		pci_aer_clear_nonfatal_status(pdev);
>   		pci_aer_clear_fatal_status(pdev);
>   	}

-- 
Sathyanarayanan Kuppuswamy
Linux Kernel Developer


  reply	other threads:[~2025-03-20  0:07 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-19  8:40 [PATCH v3 0/8] Rate limit AER logs Jon Pan-Doh
2025-03-19  8:40 ` [PATCH v3 1/8] PCI/AER: Check log level once and propagate down Jon Pan-Doh
2025-03-20  0:07   ` Sathyanarayanan Kuppuswamy [this message]
2025-03-19  8:40 ` [PATCH v3 2/8] PCI/AER: Make all pci_print_aer() log levels depend on error type Jon Pan-Doh
2025-03-19  9:52   ` Ilpo Järvinen
2025-03-20  2:39   ` Sathyanarayanan Kuppuswamy
2025-03-20  8:27     ` Jon Pan-Doh
2025-03-20 14:23       ` Sathyanarayanan Kuppuswamy
2025-03-20 19:06         ` Jon Pan-Doh
2025-03-19  8:40 ` [PATCH v3 3/8] PCI/AER: Move AER stat collection out of __aer_print_error() Jon Pan-Doh
2025-03-19 18:19   ` Bjorn Helgaas
2025-03-20  8:27     ` Jon Pan-Doh
2025-03-20  3:22   ` Sathyanarayanan Kuppuswamy
2025-03-20  8:29     ` Jon Pan-Doh
2025-03-19  8:40 ` [PATCH v3 4/8] PCI/AER: Rename struct aer_stats to aer_report Jon Pan-Doh
2025-03-20  3:29   ` Sathyanarayanan Kuppuswamy
2025-03-20  8:28     ` Jon Pan-Doh
2025-03-19  8:40 ` [PATCH v3 5/8] PCI/AER: Introduce ratelimit for error logs Jon Pan-Doh
2025-03-19 18:47   ` Bjorn Helgaas
2025-03-20  8:27     ` Jon Pan-Doh
2025-03-20 18:23       ` Bjorn Helgaas
2025-03-19  8:40 ` [PATCH v3 6/8] PCI/AER: Add ratelimits to PCI AER Documentation Jon Pan-Doh
2025-03-19  8:40 ` [PATCH v3 7/8] PCI/AER: Add sysfs attributes for log ratelimits Jon Pan-Doh
2025-03-19  9:51   ` Ilpo Järvinen
2025-03-20  8:27     ` Jon Pan-Doh
2025-03-19  8:40 ` [PATCH v3 8/8] PCI/AER: Update AER sysfs ABI filename Jon Pan-Doh
2025-03-19 22:29 ` [PATCH v3 0/8] Rate limit AER logs Sathyanarayanan Kuppuswamy
2025-03-19 22:52   ` Jon Pan-Doh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=256a57a6-e2a8-414c-bf48-34b1cdd7ddc8@linux.intel.com \
    --to=sathyanarayanan.kuppuswamy@linux.intel.com \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=Terry.bowman@amd.com \
    --cc=anilagrawal@meta.com \
    --cc=ben.fuller@oracle.com \
    --cc=bhelgaas@google.com \
    --cc=drewwalton@microsoft.com \
    --cc=ilpo.jarvinen@linux.intel.com \
    --cc=karolina.stolarek@oracle.com \
    --cc=linux-pci@vger.kernel.org \
    --cc=lukas@wunner.de \
    --cc=martin.petersen@oracle.com \
    --cc=pandoh@google.com \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.