All of lore.kernel.org
 help / color / mirror / Atom feed
From: Mauro Carvalho Chehab <mchehab@kernel.org>
To: Hanna Hawa <hhhawa@amazon.com>
Cc: <thor.thayer@linux.intel.com>, <bp@alien8.de>,
	<james.morse@arm.com>, <rric@kernel.org>, <morbidrsa@gmail.com>,
	<ralf@linux-mips.org>, <david.daney@cavium.com>,
	<andy.gross@linaro.org>, <david.brown@linaro.org>,
	<ckadabi@codeaurora.org>, <vnkgutta@codeaurora.org>,
	<jglauber@cavium.com>, <khuong@os.amperecomputing.com>,
	<dwmw@amazon.co.uk>, <benh@amazon.com>, <ronenk@amazon.com>,
	<talel@amazon.com>, <jonnyc@amazon.com>, <hanochu@amazon.com>,
	<linux-edac@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	<linux-mips@vger.kernel.org>, <linux-arm-msm@vger.kernel.org>
Subject: Re: [RFC 1/1] edac: Add a counter parameter for edac_device_handle_ue/ce()
Date: Thu, 25 Jul 2019 15:36:58 -0300	[thread overview]
Message-ID: <20190725153658.084ea1aa@coco.lan> (raw)
In-Reply-To: <1563187987-5847-1-git-send-email-hhhawa@amazon.com>

Em Mon, 15 Jul 2019 13:53:07 +0300
Hanna Hawa <hhhawa@amazon.com> escreveu:

> Add a counter parameter in order to avoid losing errors count for edac
> device, the error count reports the number of errors reported by an edac
> device similar to the way MC_EDAC do.
> 
> Signed-off-by: Hanna Hawa <hhhawa@amazon.com>
> ---
>  drivers/edac/altera_edac.c      | 20 ++++++++++++--------
>  drivers/edac/amd8111_edac.c     |  6 +++---
>  drivers/edac/cpc925_edac.c      |  4 ++--
>  drivers/edac/edac_device.c      | 18 ++++++++++--------
>  drivers/edac/edac_device.h      |  8 ++++++--
>  drivers/edac/highbank_l2_edac.c |  4 ++--
>  drivers/edac/mpc85xx_edac.c     |  4 ++--
>  drivers/edac/mv64x60_edac.c     |  4 ++--
>  drivers/edac/octeon_edac-l2c.c  | 20 ++++++++++----------
>  drivers/edac/octeon_edac-pc.c   |  6 +++---
>  drivers/edac/qcom_edac.c        |  8 ++++----
>  drivers/edac/thunderx_edac.c    | 10 +++++-----
>  drivers/edac/xgene_edac.c       | 26 +++++++++++++-------------
>  13 files changed, 74 insertions(+), 64 deletions(-)
> 
> diff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c
> index 8816f74..747dd43 100644
> --- a/drivers/edac/altera_edac.c
> +++ b/drivers/edac/altera_edac.c
> @@ -616,12 +616,12 @@ static irqreturn_t altr_edac_device_handler(int irq, void *dev_id)
>  	if (irq == drvdata->sb_irq) {
>  		if (priv->ce_clear_mask)
>  			writel(priv->ce_clear_mask, drvdata->base);
> -		edac_device_handle_ce(dci, 0, 0, drvdata->edac_dev_name);
> +		edac_device_handle_ce(dci, 1, 0, 0, drvdata->edac_dev_name);
>  		ret_value = IRQ_HANDLED;
>  	} else if (irq == drvdata->db_irq) {
>  		if (priv->ue_clear_mask)
>  			writel(priv->ue_clear_mask, drvdata->base);
> -		edac_device_handle_ue(dci, 0, 0, drvdata->edac_dev_name);
> +		edac_device_handle_ue(dci, 1, 0, 0, drvdata->edac_dev_name);
>  		panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
>  		ret_value = IRQ_HANDLED;
>  	} else {
> @@ -919,13 +919,15 @@ static irqreturn_t __maybe_unused altr_edac_a10_ecc_irq(int irq, void *dev_id)
>  	if (irq == dci->sb_irq) {
>  		writel(ALTR_A10_ECC_SERRPENA,
>  		       base + ALTR_A10_ECC_INTSTAT_OFST);
> -		edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
> +		edac_device_handle_ce(dci->edac_dev, 1, 0, 0,
> +				      dci->edac_dev_name);
>  
>  		return IRQ_HANDLED;
>  	} else if (irq == dci->db_irq) {
>  		writel(ALTR_A10_ECC_DERRPENA,
>  		       base + ALTR_A10_ECC_INTSTAT_OFST);
> -		edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
> +		edac_device_handle_ue(dci->edac_dev, 1, 0, 0,
> +				      dci->edac_dev_name);
>  		if (dci->data->panic)
>  			panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
>  
> @@ -1308,14 +1310,16 @@ static irqreturn_t altr_edac_a10_l2_irq(int irq, void *dev_id)
>  		regmap_write(dci->edac->ecc_mgr_map,
>  			     A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
>  			     A10_SYSGMR_MPU_CLEAR_L2_ECC_SB);
> -		edac_device_handle_ce(dci->edac_dev, 0, 0, dci->edac_dev_name);
> +		edac_device_handle_ce(dci->edac_dev, 1, 0, 0,
> +				      dci->edac_dev_name);
>  
>  		return IRQ_HANDLED;
>  	} else if (irq == dci->db_irq) {
>  		regmap_write(dci->edac->ecc_mgr_map,
>  			     A10_SYSGMR_MPU_CLEAR_L2_ECC_OFST,
>  			     A10_SYSGMR_MPU_CLEAR_L2_ECC_MB);
> -		edac_device_handle_ue(dci->edac_dev, 0, 0, dci->edac_dev_name);
> +		edac_device_handle_ue(dci->edac_dev, 1, 0, 0,
> +				      dci->edac_dev_name);
>  		panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n");
>  
>  		return IRQ_HANDLED;
> @@ -1652,12 +1656,12 @@ static irqreturn_t altr_edac_a10_ecc_irq_portb(int irq, void *dev_id)
>  	if (irq == ad->sb_irq) {
>  		writel(priv->ce_clear_mask,
>  		       base + ALTR_A10_ECC_INTSTAT_OFST);
> -		edac_device_handle_ce(ad->edac_dev, 0, 0, ad->edac_dev_name);
> +		edac_device_handle_ce(ad->edac_dev, 1, 0, 0, ad->edac_dev_name);
>  		return IRQ_HANDLED;
>  	} else if (irq == ad->db_irq) {
>  		writel(priv->ue_clear_mask,
>  		       base + ALTR_A10_ECC_INTSTAT_OFST);
> -		edac_device_handle_ue(ad->edac_dev, 0, 0, ad->edac_dev_name);
> +		edac_device_handle_ue(ad->edac_dev, 1, 0, 0, ad->edac_dev_name);
>  		return IRQ_HANDLED;
>  	}
>  
> diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c
> index b5786cf..e595fab 100644
> --- a/drivers/edac/amd8111_edac.c
> +++ b/drivers/edac/amd8111_edac.c
> @@ -303,7 +303,7 @@ static void amd8111_lpc_bridge_check(struct edac_device_ctl_info *edac_dev)
>  		val8 |= IO_CTRL_1_CLEAR_MASK;
>  		edac_pci_write_byte(dev, REG_IO_CTRL_1, val8);
>  
> -		edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
> +		edac_device_handle_ue(edac_dev, 1, 0, 0, edac_dev->ctl_name);
>  	}
>  
>  	if (at_compat_reg_broken == 0) {
> @@ -315,8 +315,8 @@ static void amd8111_lpc_bridge_check(struct edac_device_ctl_info *edac_dev)
>  			out8 |= AT_COMPAT_CLRIOCHK;
>  		if (out8 > 0) {
>  			__do_outb(out8, REG_AT_COMPAT);
> -			edac_device_handle_ue(edac_dev, 0, 0,
> -						edac_dev->ctl_name);
> +			edac_device_handle_ue(edac_dev, 1, 0, 0,
> +					      edac_dev->ctl_name);
>  		}
>  	}
>  }
> diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
> index 3c0881a..eb74865 100644
> --- a/drivers/edac/cpc925_edac.c
> +++ b/drivers/edac/cpc925_edac.c
> @@ -682,7 +682,7 @@ static void cpc925_cpu_check(struct edac_device_ctl_info *edac_dev)
>  	cpc925_printk(KERN_INFO, "APIMASK		0x%08x\n", apimask);
>  	cpc925_printk(KERN_INFO, "APIEXCP		0x%08x\n", apiexcp);
>  
> -	edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name);
> +	edac_device_handle_ue(edac_dev, 1, 0, 0, edac_dev->ctl_name);
>  }
>  
>  /******************** HT Link err device****************************/
> @@ -756,7 +756,7 @@ static void cpc925_htlink_check(struct edac_device_ctl_info *edac_dev)
>  		__raw_writel(HT_LINKERR_DETECTED,
>  				dev_info->vbase + REG_LINKERR_OFFSET);
>  
> -	edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name);
> +	edac_device_handle_ce(edac_dev, 1, 0, 0, edac_dev->ctl_name);
>  }
>  
>  static struct cpc925_dev_info cpc925_devs[] = {
> diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
> index 65cf2b9..d1de296 100644
> --- a/drivers/edac/edac_device.c
> +++ b/drivers/edac/edac_device.c
> @@ -556,7 +556,8 @@ static inline int edac_device_get_panic_on_ue(struct edac_device_ctl_info
>  }
>  
>  void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
> -			int inst_nr, int block_nr, const char *msg)
> +			   u16 error_count, int inst_nr, int block_nr,
> +			   const char *msg)
>  {
>  	struct edac_device_instance *instance;
>  	struct edac_device_block *block = NULL;
> @@ -582,12 +583,12 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
>  
>  	if (instance->nr_blocks > 0) {
>  		block = instance->blocks + block_nr;
> -		block->counters.ce_count++;
> +		block->counters.ce_count += error_count;
>  	}
>  
>  	/* Propagate the count up the 'totals' tree */
> -	instance->counters.ce_count++;
> -	edac_dev->counters.ce_count++;
> +	instance->counters.ce_count += error_count;
> +	edac_dev->counters.ce_count += error_count;
>  
>  	if (edac_device_get_log_ce(edac_dev))
>  		edac_device_printk(edac_dev, KERN_WARNING,
> @@ -598,7 +599,8 @@ void edac_device_handle_ce(struct edac_device_ctl_info *edac_dev,
>  EXPORT_SYMBOL_GPL(edac_device_handle_ce);
>  
>  void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
> -			int inst_nr, int block_nr, const char *msg)
> +			   u16 error_count, int inst_nr, int block_nr,
> +			   const char *msg)
>  {
>  	struct edac_device_instance *instance;
>  	struct edac_device_block *block = NULL;
> @@ -624,12 +626,12 @@ void edac_device_handle_ue(struct edac_device_ctl_info *edac_dev,
>  
>  	if (instance->nr_blocks > 0) {
>  		block = instance->blocks + block_nr;
> -		block->counters.ue_count++;
> +		block->counters.ue_count += error_count;
>  	}
>  
>  	/* Propagate the count up the 'totals' tree */
> -	instance->counters.ue_count++;
> -	edac_dev->counters.ue_count++;
> +	instance->counters.ue_count += error_count;
> +	edac_dev->counters.ue_count += error_count;

Patch itself looks a good idea, but maybe it should rise a WARN()
if error_count == 0.

That applies for both CE and UE error logic.

Thanks,
Mauro

  parent reply	other threads:[~2019-07-25 18:37 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-07-15 10:53 [RFC 1/1] edac: Add a counter parameter for edac_device_handle_ue/ce() Hanna Hawa
2019-07-17 12:06 ` Jan Glauber
2019-07-17 14:45   ` Hawa, Hanna
2019-07-25 18:36 ` Mauro Carvalho Chehab [this message]
2019-07-28  9:34   ` [UNVERIFIED SENDER] " Hawa, Hanna
2019-08-01 11:35 ` Robert Richter
2019-08-01 12:29   ` Hawa, Hanna
2019-08-01 14:17     ` Robert Richter
2019-08-01 14:30       ` Hawa, Hanna

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190725153658.084ea1aa@coco.lan \
    --to=mchehab@kernel.org \
    --cc=andy.gross@linaro.org \
    --cc=benh@amazon.com \
    --cc=bp@alien8.de \
    --cc=ckadabi@codeaurora.org \
    --cc=david.brown@linaro.org \
    --cc=david.daney@cavium.com \
    --cc=dwmw@amazon.co.uk \
    --cc=hanochu@amazon.com \
    --cc=hhhawa@amazon.com \
    --cc=james.morse@arm.com \
    --cc=jglauber@cavium.com \
    --cc=jonnyc@amazon.com \
    --cc=khuong@os.amperecomputing.com \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mips@vger.kernel.org \
    --cc=morbidrsa@gmail.com \
    --cc=ralf@linux-mips.org \
    --cc=ronenk@amazon.com \
    --cc=rric@kernel.org \
    --cc=talel@amazon.com \
    --cc=thor.thayer@linux.intel.com \
    --cc=vnkgutta@codeaurora.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.