public inbox for linux-edac@vger.kernel.org
 help / color / mirror / Atom feed
From: Serge Semin <fancer.lancer@gmail.com>
To: Michal Simek <michal.simek@amd.com>,
	Alexander Stein <alexander.stein@ew.tq-group.com>,
	Borislav Petkov <bp@alien8.de>, Tony Luck <tony.luck@intel.com>,
	James Morse <james.morse@arm.com>,
	Mauro Carvalho Chehab <mchehab@kernel.org>,
	Robert Richter <rric@kernel.org>
Cc: Punnaiah Choudary Kalluri <punnaiah.choudary.kalluri@xilinx.com>,
	Dinh Nguyen <dinguyen@kernel.org>, Arnd Bergmann <arnd@arndb.de>,
	Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	linux-arm-kernel@lists.infradead.org, linux-edac@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: [PATCH v4 16/18] EDAC/synopsys: Add erroneous page-frame/offset reporting
Date: Wed, 20 Sep 2023 22:46:53 +0300	[thread overview]
Message-ID: <20230920194656.30879-1-fancer.lancer@gmail.com> (raw)
In-Reply-To: <20230920192806.29960-1-fancer.lancer@gmail.com>

A full System/SDRAM address translation interface is now available. Use it
to determine the system address causing the ECC faults: add the System->
Application->HIF->SDRAM address translation procedures based on the DW
uMCTL2 DDRC DQ-bus config and HIF/SDRAM mapping table retrieved on the
device probe stage; for the sake of simplification convert the
snps_ecc_error_info structure to containing the snps_sdram_addr structure
instance, since the erroneous SDRAM address will now participate in the
address translation chain; issue the SDRAM->System address translation
before passing the later to the edac_mc_handle_error() method.

Note the ECC address rank needs to be retrieved now too in order to
determine a correct system address. But the rank won't be passed to the
MCI core for now since the MCI device is registered with a single ranked
layer 0.

Signed-off-by: Serge Semin <fancer.lancer@gmail.com>
---
 drivers/edac/synopsys_edac.c | 143 +++++++++++++++++++++++++++++------
 1 file changed, 118 insertions(+), 25 deletions(-)

diff --git a/drivers/edac/synopsys_edac.c b/drivers/edac/synopsys_edac.c
index 6b8949c66eef..5384e93ec58c 100644
--- a/drivers/edac/synopsys_edac.c
+++ b/drivers/edac/synopsys_edac.c
@@ -12,6 +12,7 @@
 #include <linux/fs.h>
 #include <linux/log2.h>
 #include <linux/module.h>
+#include <linux/pfn.h>
 #include <linux/platform_device.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
@@ -346,20 +347,14 @@ struct snps_sdram_addr {
 
 /**
  * struct snps_ecc_error_info - ECC error log information.
- * @row:	Row number.
- * @col:	Column number.
- * @bank:	Bank number.
- * @bankgrp:	Bank group number.
+ * @sdram:	SDRAM address.
  * @syndrome:	Error syndrome.
  * @bitpos:	Bit position.
  * @data:	Data causing the error.
  * @ecc:	Data ECC.
  */
 struct snps_ecc_error_info {
-	u32 row;
-	u32 col;
-	u32 bank;
-	u32 bankgrp;
+	struct snps_sdram_addr sdram;
 	u32 syndrome;
 	u32 bitpos;
 	u64 data;
@@ -420,6 +415,21 @@ static void snps_map_app_to_hif(struct snps_edac_priv *priv,
 	*hif = app >> priv->info.dq_width;
 }
 
+/**
+ * snps_map_hif_to_app - Map HIF address to Application address.
+ * @priv:	DDR memory controller private instance data.
+ * @hif:	HIF address (source).
+ * @app:	Application address (destination).
+ *
+ * Backward HIF-to-App translation is just the opposite DQ-width-based
+ * shift operation.
+ */
+static void snps_map_hif_to_app(struct snps_edac_priv *priv,
+				u64 hif, u64 *app)
+{
+	*app = hif << priv->info.dq_width;
+}
+
 /**
  * snps_map_hif_to_sdram - Map HIF address to SDRAM address.
  * @priv:	DDR memory controller private instance data.
@@ -471,6 +481,58 @@ static void snps_map_hif_to_sdram(struct snps_edac_priv *priv,
 	}
 }
 
+/**
+ * snps_map_sdram_to_hif - Map SDRAM address to HIF address.
+ * @priv:	DDR memory controller private instance data.
+ * @sdram:	SDRAM address (source).
+ * @hif:	HIF address (destination).
+ *
+ * SDRAM-HIF address mapping is similar to the HIF-SDRAM mapping procedure, but
+ * we'll traverse each SDRAM rank/bank/column/row bit.
+ *
+ * Note the unmapped bits of the SDRAM address components will be just
+ * ignored. So make sure the source address is valid.
+ */
+static void snps_map_sdram_to_hif(struct snps_edac_priv *priv,
+				  struct snps_sdram_addr *sdram, u64 *hif)
+{
+	struct snps_hif_sdram_map *map = &priv->hif_sdram_map;
+	unsigned long addr;
+	int i;
+
+	*hif = 0;
+
+	addr = sdram->row;
+	for_each_set_bit(i, &addr, DDR_MAX_ROW_WIDTH) {
+		if (map->row[i] != DDR_ADDRMAP_UNUSED)
+			*hif |= BIT_ULL(map->row[i]);
+	}
+
+	addr = sdram->col;
+	for_each_set_bit(i, &addr, DDR_MAX_COL_WIDTH) {
+		if (map->col[i] != DDR_ADDRMAP_UNUSED)
+			*hif |= BIT_ULL(map->col[i]);
+	}
+
+	addr = sdram->bank;
+	for_each_set_bit(i, &addr, DDR_MAX_BANK_WIDTH) {
+		if (map->bank[i] != DDR_ADDRMAP_UNUSED)
+			*hif |= BIT_ULL(map->bank[i]);
+	}
+
+	addr = sdram->bankgrp;
+	for_each_set_bit(i, &addr, DDR_MAX_BANKGRP_WIDTH) {
+		if (map->bankgrp[i] != DDR_ADDRMAP_UNUSED)
+			*hif |= BIT_ULL(map->bankgrp[i]);
+	}
+
+	addr = sdram->rank;
+	for_each_set_bit(i, &addr, DDR_MAX_RANK_WIDTH) {
+		if (map->rank[i] != DDR_ADDRMAP_UNUSED)
+			*hif |= BIT_ULL(map->rank[i]);
+	}
+}
+
 /**
  * snps_map_sys_to_sdram - Map System address to SDRAM address.
  * @priv:	DDR memory controller private instance data.
@@ -492,6 +554,27 @@ static void snps_map_sys_to_sdram(struct snps_edac_priv *priv,
 	snps_map_hif_to_sdram(priv, hif, sdram);
 }
 
+/**
+ * snps_map_sdram_to_sys - Map SDRAM address to SDRAM address.
+ * @priv:	DDR memory controller private instance data.
+ * @sys:	System address (source).
+ * @sdram:	SDRAM address (destination).
+ *
+ * Perform a full mapping of the SDRAM address (row/column/bank/etc) to
+ * the system address specific to the controller system bus ports.
+ */
+static void snps_map_sdram_to_sys(struct snps_edac_priv *priv,
+				  struct snps_sdram_addr *sdram, dma_addr_t *sys)
+{
+	u64 app, hif;
+
+	snps_map_sdram_to_hif(priv, sdram, &hif);
+
+	snps_map_hif_to_app(priv, hif, &app);
+
+	*sys = app;
+}
+
 /**
  * snps_get_bitpos - Get DQ-bus corrected bit position.
  * @syndrome:	Error syndrome.
@@ -544,12 +627,13 @@ static int snps_get_error_info(struct snps_edac_priv *priv)
 	p->ceinfo.bitpos = snps_get_bitpos(p->ceinfo.syndrome, priv->info.dq_width);
 
 	regval = readl(base + ECC_CEADDR0_OFST);
-	p->ceinfo.row = FIELD_GET(ECC_CEADDR0_ROW_MASK, regval);
+	p->ceinfo.sdram.rank = FIELD_GET(ECC_CEADDR0_RANK_MASK, regval);
+	p->ceinfo.sdram.row = FIELD_GET(ECC_CEADDR0_ROW_MASK, regval);
 
 	regval = readl(base + ECC_CEADDR1_OFST);
-	p->ceinfo.bank = FIELD_GET(ECC_CEADDR1_BANK_MASK, regval);
-	p->ceinfo.bankgrp = FIELD_GET(ECC_CEADDR1_BANKGRP_MASK, regval);
-	p->ceinfo.col = FIELD_GET(ECC_CEADDR1_COL_MASK, regval);
+	p->ceinfo.sdram.bankgrp = FIELD_GET(ECC_CEADDR1_BANKGRP_MASK, regval);
+	p->ceinfo.sdram.bank = FIELD_GET(ECC_CEADDR1_BANK_MASK, regval);
+	p->ceinfo.sdram.col = FIELD_GET(ECC_CEADDR1_COL_MASK, regval);
 
 	p->ceinfo.data = readl(base + ECC_CSYND0_OFST);
 	if (priv->info.dq_width == SNPS_DQ_64)
@@ -562,12 +646,13 @@ static int snps_get_error_info(struct snps_edac_priv *priv)
 		goto out;
 
 	regval = readl(base + ECC_UEADDR0_OFST);
-	p->ueinfo.row = FIELD_GET(ECC_CEADDR0_ROW_MASK, regval);
+	p->ueinfo.sdram.rank = FIELD_GET(ECC_CEADDR0_RANK_MASK, regval);
+	p->ueinfo.sdram.row = FIELD_GET(ECC_CEADDR0_ROW_MASK, regval);
 
 	regval = readl(base + ECC_UEADDR1_OFST);
-	p->ueinfo.bankgrp = FIELD_GET(ECC_CEADDR1_BANKGRP_MASK, regval);
-	p->ueinfo.bank = FIELD_GET(ECC_CEADDR1_BANK_MASK, regval);
-	p->ueinfo.col = FIELD_GET(ECC_CEADDR1_COL_MASK, regval);
+	p->ueinfo.sdram.bankgrp = FIELD_GET(ECC_CEADDR1_BANKGRP_MASK, regval);
+	p->ueinfo.sdram.bank = FIELD_GET(ECC_CEADDR1_BANK_MASK, regval);
+	p->ueinfo.sdram.col = FIELD_GET(ECC_CEADDR1_COL_MASK, regval);
 
 	p->ueinfo.data = readl(base + ECC_UESYND0_OFST);
 	if (priv->info.dq_width == SNPS_DQ_64)
@@ -599,31 +684,39 @@ static void snps_handle_error(struct mem_ctl_info *mci, struct snps_ecc_status *
 {
 	struct snps_edac_priv *priv = mci->pvt_info;
 	struct snps_ecc_error_info *pinf;
+	dma_addr_t sys;
 
 	if (p->ce_cnt) {
 		pinf = &p->ceinfo;
 
+		snps_map_sdram_to_sys(priv, &pinf->sdram, &sys);
+
 		snprintf(priv->message, SNPS_EDAC_MSG_SIZE,
-			 "Row %d Col %d Bank %d Bank Group %d Bit %d Data 0x%08llx:0x%02x",
-			 pinf->row, pinf->col, pinf->bank, pinf->bankgrp,
+			 "Row %hu Col %hu Bank %hhu Bank Group %hhu Rank %hhu Bit %d Data 0x%08llx:0x%02x",
+			 pinf->sdram.row, pinf->sdram.col, pinf->sdram.bank,
+			 pinf->sdram.bankgrp, pinf->sdram.rank,
 			 pinf->bitpos, pinf->data, pinf->ecc);
 
-		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci,
-				     p->ce_cnt, 0, 0, pinf->syndrome, 0, 0, -1,
+		edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, p->ce_cnt,
+				     PHYS_PFN(sys), offset_in_page(sys),
+				     pinf->syndrome, 0, 0, -1,
 				     priv->message, "");
 	}
 
 	if (p->ue_cnt) {
 		pinf = &p->ueinfo;
 
+		snps_map_sdram_to_sys(priv, &pinf->sdram, &sys);
+
 		snprintf(priv->message, SNPS_EDAC_MSG_SIZE,
-			 "Row %d Col %d Bank %d Bank Group %d Data 0x%08llx:0x%02x",
-			 pinf->row, pinf->col, pinf->bank, pinf->bankgrp,
+			 "Row %hu Col %hu Bank %hhu Bank Group %hhu Rank %hhu Data 0x%08llx:0x%02x",
+			 pinf->sdram.row, pinf->sdram.col, pinf->sdram.bank,
+			 pinf->sdram.bankgrp, pinf->sdram.rank,
 			 pinf->data, pinf->ecc);
 
-		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci,
-				     p->ue_cnt, 0, 0, 0, 0, 0, -1,
-				     priv->message, "");
+		edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, p->ue_cnt,
+				     PHYS_PFN(sys), offset_in_page(sys),
+				     0, 0, 0, -1, priv->message, "");
 	}
 
 	memset(p, 0, sizeof(*p));
-- 
2.41.0


  parent reply	other threads:[~2023-09-20 19:47 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-09-20 19:26 [PATCH v4 00/18] EDAC/synopsys: Add generic DDRC info and address mapping Serge Semin
2023-09-20 19:26 ` [PATCH v4 01/18] EDAC/synopsys: Convert sysfs nodes to debugfs ones Serge Semin
2023-09-20 19:26 ` [PATCH v4 02/18] EDAC/mc: Extend memtypes with LPDDR(mDDR) and LPDDR2 Serge Semin
2023-09-20 19:26 ` [PATCH v4 03/18] EDAC/synopsys: Extend memtypes supported by controller Serge Semin
2023-09-20 19:26 ` [PATCH v4 04/18] EDAC/synopsys: Detach private data from mci instance Serge Semin
2023-09-20 19:26 ` [PATCH v4 05/18] EDAC/synopsys: Add DDRC basic parameters infrastructure Serge Semin
2023-09-26  8:07   ` kernel test robot
2023-09-26  8:56     ` Serge Semin
2023-09-20 19:26 ` [PATCH v4 06/18] EDAC/synopsys: Convert plat-data to plat-init function Serge Semin
2023-09-20 19:26 ` [PATCH v4 07/18] EDAC/synopsys: Parse ADDRMAP[7-8] CSRs for (LP)DDR4 only Serge Semin
2023-09-20 19:26 ` [PATCH v4 08/18] EDAC/synopsys: Parse ADDRMAP[0] CSR for multi-ranks case only Serge Semin
2023-09-20 19:26 ` [PATCH v4 09/18] EDAC/synopsys: Set actual DIMM ECC errors grain Serge Semin
2023-09-20 19:26 ` [PATCH v4 10/18] EDAC/synopsys: Get corrected bit position Serge Semin
2023-09-20 19:26 ` [PATCH v4 11/18] EDAC/synopsys: Pass syndrome to EDAC error handler Serge Semin
2023-09-20 19:26 ` [PATCH v4 12/18] EDAC/synopsys: Read full data+ecc pattern on errors Serge Semin
2023-09-20 19:26 ` [PATCH v4 13/18] EDAC/synopsys: Introduce System/SDRAM address translation interface Serge Semin
2023-09-27  7:13   ` kernel test robot
2023-09-20 19:26 ` [PATCH v4 14/18] EDAC/synopsys: Simplify HIF/SDRAM column mapping get procedure Serge Semin
2023-09-20 19:27 ` [PATCH v4 15/18] EDAC/synopsys: Add HIF/SDRAM mapping debugfs node Serge Semin
2023-09-20 19:46 ` Serge Semin [this message]
2023-09-20 19:50 ` [PATCH v4 17/18] EDAC/synopsys: Add system address regions support Serge Semin
2023-09-20 19:50 ` [PATCH v4 18/18] EDAC/synopsys: Add mapping-based memory size calculation Serge Semin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230920194656.30879-1-fancer.lancer@gmail.com \
    --to=fancer.lancer@gmail.com \
    --cc=alexander.stein@ew.tq-group.com \
    --cc=arnd@arndb.de \
    --cc=bp@alien8.de \
    --cc=dinguyen@kernel.org \
    --cc=gregkh@linuxfoundation.org \
    --cc=james.morse@arm.com \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mchehab@kernel.org \
    --cc=michal.simek@amd.com \
    --cc=punnaiah.choudary.kalluri@xilinx.com \
    --cc=rric@kernel.org \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox