Linux PCI subsystem development
 help / color / mirror / Atom feed
From: Bjorn Helgaas <helgaas@kernel.org>
To: Jon Pan-Doh <pandoh@google.com>
Cc: "Bjorn Helgaas" <bhelgaas@google.com>,
	"Karolina Stolarek" <karolina.stolarek@oracle.com>,
	linux-pci@vger.kernel.org,
	"Martin Petersen" <martin.petersen@oracle.com>,
	"Ben Fuller" <ben.fuller@oracle.com>,
	"Drew Walton" <drewwalton@microsoft.com>,
	"Anil Agrawal" <anilagrawal@meta.com>,
	"Tony Luck" <tony.luck@intel.com>,
	"Ilpo Järvinen" <ilpo.jarvinen@linux.intel.com>,
	"Sathyanarayanan Kuppuswamy"
	<sathyanarayanan.kuppuswamy@linux.intel.com>,
	"Lukas Wunner" <lukas@wunner.de>,
	"Jonathan Cameron" <Jonathan.Cameron@huawei.com>,
	"Sargun Dhillon" <sargun@meta.com>,
	"Paul E . McKenney" <paulmck@kernel.org>
Subject: Re: [PATCH v5 6/8] PCI/AER: Introduce ratelimit for error logs
Date: Thu, 24 Apr 2025 15:31:19 -0500	[thread overview]
Message-ID: <20250424203119.GA497240@bhelgaas> (raw)
In-Reply-To: <20250321015806.954866-7-pandoh@google.com>

On Thu, Mar 20, 2025 at 06:58:04PM -0700, Jon Pan-Doh wrote:
> Spammy devices can flood kernel logs with AER errors and slow/stall
> execution. Add per-device ratelimits for AER correctable and uncorrectable
> errors that use the kernel defaults (10 per 5s).

Sorry for the long delay getting back to this.  Obviously this series
will need to be rebased to v6.15-rc1.

> +++ b/drivers/pci/pci.h
> @@ -533,6 +533,7 @@ static inline bool pci_dev_test_and_set_removed(struct pci_dev *dev)
>  
>  struct aer_err_info {
>  	struct pci_dev *dev[AER_MAX_MULTI_ERR_DEVICES];
> +	bool ratelimited[AER_MAX_MULTI_ERR_DEVICES];

What would you think about this with related changes below:

  int ratelimit[AER_MAX_MULTI_ERR_DEVICES];
  int combined_ratelimit;

> +static bool aer_ratelimited(struct pci_dev *dev, unsigned int severity)
> +{
> +	struct ratelimit_state *ratelimit;
> +
> +	if (severity == AER_CORRECTABLE)
> +		ratelimit = &dev->aer_report->cor_log_ratelimit;
> +	else
> +		ratelimit = &dev->aer_report->uncor_log_ratelimit;
> +
> +	return !__ratelimit(ratelimit);

IMO this will fit better with other ratelimit users if we return int
with:

  return __ratelimit(ratelimit);

>  void aer_print_error(struct pci_dev *dev, struct aer_err_info *info,
> -		     const char *level)
> +		     const char *level, bool ratelimited)
>  {
>  	int layer, agent;
>  	int id = pci_dev_id(dev);
>  
> +	trace_aer_event(dev_name(&dev->dev), (info->status & ~info->mask),
> +			info->severity, info->tlp_header_valid, &info->tlp);

Maybe move the trace_aer_event() call up to aer_process_err_devices(),
where it would be next to the pci_dev_aer_stats_incr()?  Then
aer_print_error() would be pure printing.

The e_info->ratelimit[i] test could go in aer_process_err_devices() as
well, so you wouldn't have to pass it in to aer_print_error().

>  static void aer_print_rp_info(struct pci_dev *rp, struct aer_err_info *info)
>  {
>  	u8 bus = info->id >> 8;
>  	u8 devfn = info->id & 0xff;
> +	struct pci_dev *dev;
> +	bool ratelimited = false;
> +	int i;
>  
> -	pci_info(rp, "%s%s error message received from %04x:%02x:%02x.%d\n",
> -		 info->multi_error_valid ? "Multiple " : "",
> -		 aer_error_severity_string[info->severity],
> -		 pci_domain_nr(rp->bus), bus, PCI_SLOT(devfn),
> -		 PCI_FUNC(devfn));
> +	/* extract endpoint device ratelimit */
> +	for (i = 0; i < info->error_dev_num; i++) {
> +		dev = info->dev[i];
> +		if (info->id == pci_dev_id(dev)) {
> +			ratelimited = info->ratelimited[i];
> +			break;
> +		}
> +	}

If add_error_device() sets info->combined_ratelimit (as below), you
could drop the loop above and do this:

  if (info->combined_ratelimit)
    pci_info(rp, "...");

The combined_ratelimit check could go up in aer_isr_one_error() and
this function would also be pure printing.

I guess this and aer_print_error() could go either way: the ratelimit
check inside the function or in the caller.  If you do the check
inside aer_print_error(), you have to pass in ratelimit because you
don't know which element of the info->ratelimit[] table to look at,
which I guess is an argument for doing the check in the callers.

> +	if (!ratelimited)
> +		pci_info(rp, "%s%s error message received from %04x:%02x:%02x.%d\n",
> +			 info->multi_error_valid ? "Multiple " : "",
> +			 aer_error_severity_string[info->severity],
> +			 pci_domain_nr(rp->bus), bus, PCI_SLOT(devfn),
> +			 PCI_FUNC(devfn));
>  }

>  
>  #ifdef CONFIG_ACPI_APEI_PCIEAER
> @@ -784,6 +822,12 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>  
>  	pci_dev_aer_stats_incr(dev, &info);
>  
> +	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
> +			aer_severity, tlp_header_valid, &aer->header_log);
> +
> +	if (aer_ratelimited(dev, aer_severity))
> +		return;
> +
>  	aer_printk(level, dev, "aer_status: 0x%08x, aer_mask: 0x%08x\n", status, mask);
>  	__aer_print_error(dev, &info, level);
>  	aer_printk(level, dev, "aer_layer=%s, aer_agent=%s\n",
> @@ -795,9 +839,6 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
>  
>  	if (tlp_header_valid)
>  		pcie_print_tlp_log(dev, &aer->header_log, dev_fmt("  "));
> -
> -	trace_aer_event(dev_name(&dev->dev), (status & ~mask),
> -			aer_severity, tlp_header_valid, &aer->header_log);
>  }
>  EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
>  
> @@ -808,8 +849,12 @@ EXPORT_SYMBOL_NS_GPL(pci_print_aer, "CXL");
>   */
>  static int add_error_device(struct aer_err_info *e_info, struct pci_dev *dev)
>  {
> -	if (e_info->error_dev_num < AER_MAX_MULTI_ERR_DEVICES) {
> -		e_info->dev[e_info->error_dev_num] = pci_dev_get(dev);
> +	int dev_idx = e_info->error_dev_num;
> +	unsigned int severity = e_info->severity;
> +
> +	if (dev_idx < AER_MAX_MULTI_ERR_DEVICES) {
> +		e_info->dev[dev_idx] = pci_dev_get(dev);
> +		e_info->ratelimited[dev_idx] = aer_ratelimited(dev, severity);

If we have info to print for this device (ratelimit==1), we should
also print the Root Port header.  I think this would be simpler than
combining the device ratelimits in aer_print_rp_info():

  int ratelimit = aer_ratelimit(dev, severity);
  e_info->ratelimited[dev_idx] = ratelimit;
  e_info->combined_ratelimit |= ratelimit;

>  		e_info->error_dev_num++;
>  		return 0;
>  	}
> @@ -1265,7 +1310,8 @@ static inline void aer_process_err_devices(struct aer_err_info *e_info,
>  	for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
>  		if (aer_get_device_error_info(e_info->dev[i], e_info)) {
>  			pci_dev_aer_stats_incr(e_info->dev[i], e_info);
> -			aer_print_error(e_info->dev[i], e_info, level);
> +			aer_print_error(e_info->dev[i], e_info, level,
> +					e_info->ratelimited[i]);
>  		}
>  	}
>  	for (i = 0; i < e_info->error_dev_num && e_info->dev[i]; i++) {
> @@ -1299,10 +1345,11 @@ static void aer_isr_one_error(struct aer_rpc *rpc,

Tangent: I'm a little queasy about how e_info is an uninitialized
stack variable in aer_isr_one_error().  There are hints that we know
about this, e.g., the "Must reset in this function" comment in
find_source_device(), but I would feel a lot better about this if we
just cleared it out.

>  			e_info.multi_error_valid = 1;
>  		else
>  			e_info.multi_error_valid = 0;
> -		aer_print_rp_info(pdev, &e_info);
>  
> -		if (find_source_device(pdev, &e_info))
> +		if (find_source_device(pdev, &e_info)) {
> +			aer_print_rp_info(pdev, &e_info);
>  			aer_process_err_devices(&e_info, KERN_WARNING);
> +		}

Previously we always printed the RP info ("error message received
from").  Now we only print the RP info if we found a downstream device
with error info.

I think we should print the RP info even if we can't find the
downstream device (maybe it's broken, was yanked out, powered off,
etc), e.g., maybe something like this:

    if (find_source_device(pdev, &e_info)) {
	if (e_info.combined_ratelimit)
	    aer_print_rp_info(pdev, &e_info);
	aer_process_err_devices(&e_info, KERN_WARNING);
    } else {
	if (aer_ratelimit(pdev, AER_CORRECTABLE))
	    aer_print_rp_info(pdev, &e_info);
    }

The idea is:

  - we print the RP info if any downstream device info will be
    printed, and the downstream info is ratelimited based on the
    device it came from, and

  - if we don't find downstream error info, we ratelimit printing the
    RP info based on the RP itself.

Bjorn

  parent reply	other threads:[~2025-04-24 20:31 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-03-21  1:57 [PATCH v5 0/8] Rate limit AER logs Jon Pan-Doh
2025-03-21  1:57 ` [PATCH v5 1/8] PCI/AER: Check log level once and propagate down Jon Pan-Doh
2025-05-01 21:43   ` Bjorn Helgaas
2025-05-05  9:30     ` Karolina Stolarek
2025-05-05 17:43       ` Bjorn Helgaas
2025-05-08 15:07         ` Karolina Stolarek
2025-03-21  1:58 ` [PATCH v5 2/8] PCI/AER: Make all pci_print_aer() log levels depend on error type Jon Pan-Doh
2025-03-21  1:58 ` [PATCH v5 3/8] PCI/AER: Move AER stat collection out of __aer_print_error() Jon Pan-Doh
2025-03-21  1:58 ` [PATCH v5 4/8] PCI/AER: Rename aer_print_port_info() to aer_printrp_info() Jon Pan-Doh
2025-03-21 13:39   ` Karolina Stolarek
2025-03-21 19:26     ` Jon Pan-Doh
2025-03-21  1:58 ` [PATCH v5 5/8] PCI/AER: Rename struct aer_stats to aer_report Jon Pan-Doh
2025-03-21 22:01   ` Bjorn Helgaas
2025-03-21 22:15     ` Jon Pan-Doh
2025-03-21 22:30       ` Paul E. McKenney
2025-03-21 22:16     ` Paul E. McKenney
2025-03-21 22:39       ` Bjorn Helgaas
2025-03-21 22:47         ` Paul E. McKenney
2025-05-01 22:02         ` Bjorn Helgaas
2025-05-02  2:16           ` Paul E. McKenney
2025-03-21  1:58 ` [PATCH v5 6/8] PCI/AER: Introduce ratelimit for error logs Jon Pan-Doh
2025-03-21 13:46   ` Karolina Stolarek
2025-03-21 18:41     ` Jon Pan-Doh
2025-04-04  9:32       ` Karolina Stolarek
2025-03-25 17:17   ` Paul E. McKenney
2025-03-27 22:49     ` Jon Pan-Doh
2025-04-03 19:02       ` Paul E. McKenney
2025-03-31 18:48   ` Bjorn Helgaas
2025-04-01  0:30     ` Jon Pan-Doh
2025-04-01 18:02       ` Bjorn Helgaas
2025-04-24 20:31   ` Bjorn Helgaas [this message]
2025-03-21  1:58 ` [PATCH v5 7/8] PCI/AER: Add ratelimits to PCI AER Documentation Jon Pan-Doh
2025-03-21  1:58 ` [PATCH v5 8/8] PCI/AER: Add sysfs attributes for log ratelimits Jon Pan-Doh
2025-03-23 12:20   ` Krzysztof Wilczyński
2025-03-27 22:50     ` Jon Pan-Doh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250424203119.GA497240@bhelgaas \
    --to=helgaas@kernel.org \
    --cc=Jonathan.Cameron@huawei.com \
    --cc=anilagrawal@meta.com \
    --cc=ben.fuller@oracle.com \
    --cc=bhelgaas@google.com \
    --cc=drewwalton@microsoft.com \
    --cc=ilpo.jarvinen@linux.intel.com \
    --cc=karolina.stolarek@oracle.com \
    --cc=linux-pci@vger.kernel.org \
    --cc=lukas@wunner.de \
    --cc=martin.petersen@oracle.com \
    --cc=pandoh@google.com \
    --cc=paulmck@kernel.org \
    --cc=sargun@meta.com \
    --cc=sathyanarayanan.kuppuswamy@linux.intel.com \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox