Linux EDAC development
 help / color / mirror / Atom feed
* [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array
@ 2025-07-31 14:55 Qiuxu Zhuo
  2025-07-31 14:55 ` [PATCH 1/7] EDAC/{skx_common,skx}: Use configuration data, not global macros Qiuxu Zhuo
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: Qiuxu Zhuo @ 2025-07-31 14:55 UTC (permalink / raw)
  To: Tony Luck, Borislav Petkov
  Cc: Qiuxu Zhuo, James Morse, Mauro Carvalho Chehab, Robert Richter,
	Lai Yi, linux-edac, linux-kernel

Problem
=======
The current array of memory controller instances for Intel server EDAC
driver is sized using the macro NUM_IMC. Each time EDAC support is added
for a new CPU, NUM_IMC needs to be updated to ensure it is greater than
or equal to the number of memory controllers for the new CPU. This approach
is inconvenient and also results in memory waste for older CPUs with fewer
memory controllers.

Solution
========
Make the array of memory controller instances a flexible array and
determine its size from configuration data or at runtime.

Patches
=======
Patch 1~3: Refactor code to be independent of *NUM*_IMC macros.
Patch   4: Make the array of memory controller instances a flexible array.
Patch 5~7: Clean up and remove unused *NUM*_IMC macros.

Testing
=======
Pass basic testing on Cascade Lake, {Sapphire, Granite} Rapids server CPUs.
- Load and unload the {skx,i10nm_}edac driver.
- Receive events for memory correctable errors.
- Decode memory errors.

This patch series is on top of v6.16.

Qiuxu Zhuo (7):
  EDAC/{skx_common,skx}: Use configuration data, not global macros
  EDAC/skx_common: Move mc_mapping to be a field inside struct skx_imc
  EDAC/skx_common: Swap memory controller index mapping
  EDAC/skx_common: Make skx_dev->imc[] a flexible array
  EDAC/skx_common: Remove redundant upper bound check for res->imc
  EDAC/i10nm: Reallocate skx_dev list if preconfigured cnt != runtime cnt
  EDAC/skx_common: Remove unused *NUM*_IMC macros

 drivers/edac/i10nm_base.c | 13 +++++-----
 drivers/edac/skx_base.c   | 33 +++++++++++++++----------
 drivers/edac/skx_common.c | 51 +++++++++++++++++++++++++--------------
 drivers/edac/skx_common.h | 28 +++++++++------------
 4 files changed, 72 insertions(+), 53 deletions(-)


base-commit: 038d61fd642278bab63ee8ef722c50d10ab01e8f
-- 
2.43.0


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 1/7] EDAC/{skx_common,skx}: Use configuration data, not global macros
  2025-07-31 14:55 [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Qiuxu Zhuo
@ 2025-07-31 14:55 ` Qiuxu Zhuo
  2025-07-31 14:55 ` [PATCH 2/7] EDAC/skx_common: Move mc_mapping to be a field inside struct skx_imc Qiuxu Zhuo
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Qiuxu Zhuo @ 2025-07-31 14:55 UTC (permalink / raw)
  To: Tony Luck, Borislav Petkov
  Cc: Qiuxu Zhuo, James Morse, Mauro Carvalho Chehab, Robert Richter,
	Lai Yi, linux-edac, linux-kernel

Use model-specific configuration data for the number of memory controllers
per socket, channels per memory controller, and DIMMs per channel as
intended, instead of relying on global macros for maximum values.

No functional changes intended.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
---
 drivers/edac/skx_base.c   | 33 ++++++++++++++++++++-------------
 drivers/edac/skx_common.c | 16 +++++++++-------
 drivers/edac/skx_common.h |  1 +
 3 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index 29897b21fb8e..078ddf95cc6e 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -33,6 +33,15 @@ static unsigned int nvdimm_count;
 #define	MASK26	0x3FFFFFF		/* Mask for 2^26 */
 #define MASK29	0x1FFFFFFF		/* Mask for 2^29 */
 
+static struct res_config skx_cfg = {
+	.type			= SKX,
+	.decs_did		= 0x2016,
+	.busno_cfg_offset	= 0xcc,
+	.ddr_imc_num		= 2,
+	.ddr_chan_num		= 3,
+	.ddr_dimm_num		= 2,
+};
+
 static struct skx_dev *get_skx_dev(struct pci_bus *bus, u8 idx)
 {
 	struct skx_dev *d;
@@ -52,7 +61,7 @@ enum munittype {
 
 struct munit {
 	u16	did;
-	u16	devfn[SKX_NUM_IMC];
+	u16	devfn[2];
 	u8	busidx;
 	u8	per_socket;
 	enum munittype mtype;
@@ -89,11 +98,11 @@ static int get_all_munits(const struct munit *m)
 		if (!pdev)
 			break;
 		ndev++;
-		if (m->per_socket == SKX_NUM_IMC) {
-			for (i = 0; i < SKX_NUM_IMC; i++)
+		if (m->per_socket == skx_cfg.ddr_imc_num) {
+			for (i = 0; i < skx_cfg.ddr_imc_num; i++)
 				if (m->devfn[i] == pdev->devfn)
 					break;
-			if (i == SKX_NUM_IMC)
+			if (i == skx_cfg.ddr_imc_num)
 				goto fail;
 		}
 		d = get_skx_dev(pdev->bus, m->busidx);
@@ -157,12 +166,6 @@ static int get_all_munits(const struct munit *m)
 	return -ENODEV;
 }
 
-static struct res_config skx_cfg = {
-	.type			= SKX,
-	.decs_did		= 0x2016,
-	.busno_cfg_offset	= 0xcc,
-};
-
 static const struct x86_cpu_id skx_cpuids[] = {
 	X86_MATCH_VFM(INTEL_SKYLAKE_X, &skx_cfg),
 	{ }
@@ -186,11 +189,11 @@ static int skx_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg)
 	/* Only the mcmtr on the first channel is effective */
 	pci_read_config_dword(imc->chan[0].cdev, 0x87c, &mcmtr);
 
-	for (i = 0; i < SKX_NUM_CHANNELS; i++) {
+	for (i = 0; i < cfg->ddr_chan_num; i++) {
 		ndimms = 0;
 		pci_read_config_dword(imc->chan[i].cdev, 0x8C, &amap);
 		pci_read_config_dword(imc->chan[i].cdev, 0x400, &mcddrtcfg);
-		for (j = 0; j < SKX_NUM_DIMMS; j++) {
+		for (j = 0; j < cfg->ddr_dimm_num; j++) {
 			dimm = edac_get_dimm(mci, i, j, 0);
 			pci_read_config_dword(imc->chan[i].cdev,
 					      0x80 + 4 * j, &mtr);
@@ -620,6 +623,7 @@ static int __init skx_init(void)
 		return -ENODEV;
 
 	cfg = (struct res_config *)id->driver_data;
+	skx_set_res_cfg(cfg);
 
 	rc = skx_get_hi_lo(0x2034, off, &skx_tolm, &skx_tohm);
 	if (rc)
@@ -652,10 +656,13 @@ static int __init skx_init(void)
 			goto fail;
 
 		edac_dbg(2, "src_id = %d\n", src_id);
-		for (i = 0; i < SKX_NUM_IMC; i++) {
+		for (i = 0; i < cfg->ddr_imc_num; i++) {
 			d->imc[i].mc = mc++;
 			d->imc[i].lmc = i;
 			d->imc[i].src_id = src_id;
+			d->imc[i].num_channels = cfg->ddr_chan_num;
+			d->imc[i].num_dimms    = cfg->ddr_dimm_num;
+
 			rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev,
 					      "Skylake Socket", EDAC_MOD_STR,
 					      skx_get_dimm_config, cfg);
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index c9ade45c1a99..d0f53a3a8a0b 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -320,10 +320,10 @@ static int get_width(u32 mtr)
  */
 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
 {
+	int ndev = 0, imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num;
 	struct pci_dev *pdev, *prev;
 	struct skx_dev *d;
 	u32 reg;
-	int ndev = 0;
 
 	prev = NULL;
 	for (;;) {
@@ -354,8 +354,10 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
 			d->seg = GET_BITFIELD(reg, 16, 23);
 		}
 
-		edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x\n",
-			 d->bus[0], d->bus[1], d->bus[2], d->bus[3]);
+		d->num_imc = imc_num;
+
+		edac_dbg(2, "busses: 0x%x, 0x%x, 0x%x, 0x%x, imcs %d\n",
+			 d->bus[0], d->bus[1], d->bus[2], d->bus[3], imc_num);
 		list_add_tail(&d->list, &dev_edac_list);
 		prev = pdev;
 
@@ -541,10 +543,10 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
 
 	/* Allocate a new MC control structure */
 	layers[0].type = EDAC_MC_LAYER_CHANNEL;
-	layers[0].size = NUM_CHANNELS;
+	layers[0].size = imc->num_channels;
 	layers[0].is_virt_csrow = false;
 	layers[1].type = EDAC_MC_LAYER_SLOT;
-	layers[1].size = NUM_DIMMS;
+	layers[1].size = imc->num_dimms;
 	layers[1].is_virt_csrow = true;
 	mci = edac_mc_alloc(imc->mc, ARRAY_SIZE(layers), layers,
 			    sizeof(struct skx_pvt));
@@ -784,7 +786,7 @@ void skx_remove(void)
 
 	list_for_each_entry_safe(d, tmp, &dev_edac_list, list) {
 		list_del(&d->list);
-		for (i = 0; i < NUM_IMC; i++) {
+		for (i = 0; i < d->num_imc; i++) {
 			if (d->imc[i].mci)
 				skx_unregister_mci(&d->imc[i]);
 
@@ -794,7 +796,7 @@ void skx_remove(void)
 			if (d->imc[i].mbase)
 				iounmap(d->imc[i].mbase);
 
-			for (j = 0; j < NUM_CHANNELS; j++) {
+			for (j = 0; j < d->imc[i].num_channels; j++) {
 				if (d->imc[i].chan[j].cdev)
 					pci_dev_put(d->imc[i].chan[j].cdev);
 			}
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index ec4966f7ea40..3f6007a97267 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -134,6 +134,7 @@ struct skx_dev {
 	struct pci_dev *uracu; /* for i10nm CPU */
 	struct pci_dev *pcu_cr3; /* for HBM memory detection */
 	u32 mcroute;
+	int num_imc;
 	/*
 	 * Some server BIOS may hide certain memory controllers, and the
 	 * EDAC driver skips those hidden memory controllers. However, the
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/7] EDAC/skx_common: Move mc_mapping to be a field inside struct skx_imc
  2025-07-31 14:55 [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Qiuxu Zhuo
  2025-07-31 14:55 ` [PATCH 1/7] EDAC/{skx_common,skx}: Use configuration data, not global macros Qiuxu Zhuo
@ 2025-07-31 14:55 ` Qiuxu Zhuo
  2025-07-31 14:55 ` [PATCH 3/7] EDAC/skx_common: Swap memory controller index mapping Qiuxu Zhuo
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Qiuxu Zhuo @ 2025-07-31 14:55 UTC (permalink / raw)
  To: Tony Luck, Borislav Petkov
  Cc: Qiuxu Zhuo, James Morse, Mauro Carvalho Chehab, Robert Richter,
	Lai Yi, linux-edac, linux-kernel

The mc_mapping and imc fields of struct skx_dev have the same size,
NUM_IMC. Move mc_mapping to be a field inside struct skx_imc to prepare
for making the imc array of memory controller instances a flexible array.

No functional changes intended.

Suggested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
---
 drivers/edac/skx_common.c |  8 ++++----
 drivers/edac/skx_common.h | 20 ++++++++++----------
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index d0f53a3a8a0b..94a66b28751a 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -131,7 +131,7 @@ static void skx_init_mc_mapping(struct skx_dev *d)
 	 * EDAC driver.
 	 */
 	for (int i = 0; i < NUM_IMC; i++)
-		d->mc_mapping[i] = i;
+		d->imc[i].mc_mapping = i;
 }
 
 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc)
@@ -139,16 +139,16 @@ void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc)
 	edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n",
 		 pmc, lmc);
 
-	d->mc_mapping[pmc] = lmc;
+	d->imc[pmc].mc_mapping = lmc;
 }
 EXPORT_SYMBOL_GPL(skx_set_mc_mapping);
 
 static u8 skx_get_mc_mapping(struct skx_dev *d, u8 pmc)
 {
 	edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n",
-		 pmc, d->mc_mapping[pmc]);
+		 pmc, d->imc[pmc].mc_mapping);
 
-	return d->mc_mapping[pmc];
+	return d->imc[pmc].mc_mapping;
 }
 
 static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 3f6007a97267..95d61d23f89e 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -135,16 +135,6 @@ struct skx_dev {
 	struct pci_dev *pcu_cr3; /* for HBM memory detection */
 	u32 mcroute;
 	int num_imc;
-	/*
-	 * Some server BIOS may hide certain memory controllers, and the
-	 * EDAC driver skips those hidden memory controllers. However, the
-	 * ADXL still decodes memory error address using physical memory
-	 * controller indices. The mapping table is used to convert the
-	 * physical indices (reported by ADXL) to the logical indices
-	 * (used the EDAC driver) of present memory controllers during the
-	 * error handling process.
-	 */
-	u8 mc_mapping[NUM_IMC];
 	struct skx_imc {
 		struct mem_ctl_info *mci;
 		struct pci_dev *mdev; /* for i10nm CPU */
@@ -156,6 +146,16 @@ struct skx_dev {
 		u8 mc;	/* system wide mc# */
 		u8 lmc;	/* socket relative mc# */
 		u8 src_id;
+		/*
+		 * Some server BIOS may hide certain memory controllers, and the
+		 * EDAC driver skips those hidden memory controllers. However, the
+		 * ADXL still decodes memory error address using physical memory
+		 * controller indices. The mapping table is used to convert the
+		 * physical indices (reported by ADXL) to the logical indices
+		 * (used the EDAC driver) of present memory controllers during the
+		 * error handling process.
+		 */
+		u8 mc_mapping;
 		struct skx_channel {
 			struct pci_dev	*cdev;
 			struct pci_dev	*edev;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/7] EDAC/skx_common: Swap memory controller index mapping
  2025-07-31 14:55 [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Qiuxu Zhuo
  2025-07-31 14:55 ` [PATCH 1/7] EDAC/{skx_common,skx}: Use configuration data, not global macros Qiuxu Zhuo
  2025-07-31 14:55 ` [PATCH 2/7] EDAC/skx_common: Move mc_mapping to be a field inside struct skx_imc Qiuxu Zhuo
@ 2025-07-31 14:55 ` Qiuxu Zhuo
  2025-07-31 14:55 ` [PATCH 4/7] EDAC/skx_common: Make skx_dev->imc[] a flexible array Qiuxu Zhuo
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Qiuxu Zhuo @ 2025-07-31 14:55 UTC (permalink / raw)
  To: Tony Luck, Borislav Petkov
  Cc: Qiuxu Zhuo, James Morse, Mauro Carvalho Chehab, Robert Richter,
	Lai Yi, linux-edac, linux-kernel

The current mapping of memory controller indices is from physical index [1]
to logical index [2], as show below:

  skx_dev->imc[pmc].mc_mapping = lmc

Since skx_dev->imc[] is an array of present memory controller instances,
mapping memory controller indices from logical index to physical index,
as show below, is more reasonable. This is also a preparatory step for
making skx_dev->imc[] a flexible array.

  skx_dev->imc[lmc].mc_mapping = pmc

Both mappings are equivalent. No functional changes intended.

[1] Indices for memory controllers include both those present to the
    OS and those disabled by BIOS.

[2] Indices for memory controllers present to the OS.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
---
 drivers/edac/skx_common.c | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 94a66b28751a..744706334b9d 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -130,7 +130,7 @@ static void skx_init_mc_mapping(struct skx_dev *d)
 	 * the logical indices of the memory controllers enumerated by the
 	 * EDAC driver.
 	 */
-	for (int i = 0; i < NUM_IMC; i++)
+	for (int i = 0; i < d->num_imc; i++)
 		d->imc[i].mc_mapping = i;
 }
 
@@ -139,22 +139,28 @@ void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc)
 	edac_dbg(0, "Set the mapping of mc phy idx to logical idx: %02d -> %02d\n",
 		 pmc, lmc);
 
-	d->imc[pmc].mc_mapping = lmc;
+	d->imc[lmc].mc_mapping = pmc;
 }
 EXPORT_SYMBOL_GPL(skx_set_mc_mapping);
 
-static u8 skx_get_mc_mapping(struct skx_dev *d, u8 pmc)
+static int skx_get_mc_mapping(struct skx_dev *d, u8 pmc)
 {
-	edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n",
-		 pmc, d->imc[pmc].mc_mapping);
+	for (int lmc = 0; lmc < d->num_imc; lmc++) {
+		if (d->imc[lmc].mc_mapping == pmc) {
+			edac_dbg(0, "Get the mapping of mc phy idx to logical idx: %02d -> %02d\n",
+				 pmc, lmc);
 
-	return d->imc[pmc].mc_mapping;
+			return lmc;
+		}
+	}
+
+	return -1;
 }
 
 static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
 {
+	int i, lmc, len = 0;
 	struct skx_dev *d;
-	int i, len = 0;
 
 	if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
 				      res->addr < BIT_ULL(32))) {
@@ -218,7 +224,13 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
 		return false;
 	}
 
-	res->imc = skx_get_mc_mapping(d, res->imc);
+	lmc = skx_get_mc_mapping(d, res->imc);
+	if (lmc < 0) {
+		skx_printk(KERN_ERR, "No lmc for imc %d\n", res->imc);
+		return false;
+	}
+
+	res->imc = lmc;
 
 	for (i = 0; i < adxl_component_count; i++) {
 		if (adxl_values[i] == ~0x0ull)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/7] EDAC/skx_common: Make skx_dev->imc[] a flexible array
  2025-07-31 14:55 [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Qiuxu Zhuo
                   ` (2 preceding siblings ...)
  2025-07-31 14:55 ` [PATCH 3/7] EDAC/skx_common: Swap memory controller index mapping Qiuxu Zhuo
@ 2025-07-31 14:55 ` Qiuxu Zhuo
  2025-07-31 14:55 ` [PATCH 5/7] EDAC/skx_common: Remove redundant upper bound check for res->imc Qiuxu Zhuo
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Qiuxu Zhuo @ 2025-07-31 14:55 UTC (permalink / raw)
  To: Tony Luck, Borislav Petkov
  Cc: Qiuxu Zhuo, James Morse, Mauro Carvalho Chehab, Robert Richter,
	Lai Yi, linux-edac, linux-kernel

The current skx->imc[NUM_IMC] array of memory controller instances is
sized using the macro NUM_IMC. Each time EDAC support is added for a
new CPU, NUM_IMC needs to be updated to ensure it is greater than or
equal to the number of memory controllers for the new CPU. This approach
is inconvenient and results in memory waste for older CPUs with fewer
memory controllers.

To address this, make skx->imc[] a flexible array and determine its size
from configuration data or at runtime.

Suggested-by: Tony Luck <tony.luck@intel.com>
Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
---
 drivers/edac/skx_common.c | 3 ++-
 drivers/edac/skx_common.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 744706334b9d..dffd75144060 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -17,6 +17,7 @@
 #include <linux/acpi.h>
 #include <linux/dmi.h>
 #include <linux/adxl.h>
+#include <linux/overflow.h>
 #include <acpi/nfit.h>
 #include <asm/mce.h>
 #include <asm/uv/uv.h>
@@ -343,7 +344,7 @@ int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list)
 		if (!pdev)
 			break;
 		ndev++;
-		d = kzalloc(sizeof(*d), GFP_KERNEL);
+		d = kzalloc(struct_size(d, imc, imc_num), GFP_KERNEL);
 		if (!d) {
 			pci_dev_put(pdev);
 			return -ENOMEM;
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 95d61d23f89e..e7038fd45d06 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -172,7 +172,7 @@ struct skx_dev {
 				u8 colbits;
 			} dimms[NUM_DIMMS];
 		} chan[NUM_CHANNELS];
-	} imc[NUM_IMC];
+	} imc[];
 };
 
 struct skx_pvt {
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 5/7] EDAC/skx_common: Remove redundant upper bound check for res->imc
  2025-07-31 14:55 [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Qiuxu Zhuo
                   ` (3 preceding siblings ...)
  2025-07-31 14:55 ` [PATCH 4/7] EDAC/skx_common: Make skx_dev->imc[] a flexible array Qiuxu Zhuo
@ 2025-07-31 14:55 ` Qiuxu Zhuo
  2025-07-31 14:55 ` [PATCH 6/7] EDAC/i10nm: Reallocate skx_dev list if preconfigured cnt != runtime cnt Qiuxu Zhuo
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: Qiuxu Zhuo @ 2025-07-31 14:55 UTC (permalink / raw)
  To: Tony Luck, Borislav Petkov
  Cc: Qiuxu Zhuo, James Morse, Mauro Carvalho Chehab, Robert Richter,
	Lai Yi, linux-edac, linux-kernel

The following upper bound check for the memory controller physical index
decoded by ADXL is the only place where use the macro 'NUM_IMC' is used:

  res->imc > NUM_IMC - 1

Since this check is already covered by skx_get_mc_mapping(), meaning no
memory controller logical index exists for an invalid memory controller
physical index decoded by ADXL, remove the redundant upper bound check
so that the definition for 'NUM_IMC' can be cleaned up (in another patch).

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
---
 drivers/edac/skx_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index dffd75144060..44f5b5402e31 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -207,7 +207,7 @@ static bool skx_adxl_decode(struct decoded_addr *res, enum error_source err_src)
 		res->cs      = (int)adxl_values[component_indices[INDEX_CS]];
 	}
 
-	if (res->imc > NUM_IMC - 1 || res->imc < 0) {
+	if (res->imc < 0) {
 		skx_printk(KERN_ERR, "Bad imc %d\n", res->imc);
 		return false;
 	}
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 6/7] EDAC/i10nm: Reallocate skx_dev list if preconfigured cnt != runtime cnt
  2025-07-31 14:55 [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Qiuxu Zhuo
                   ` (4 preceding siblings ...)
  2025-07-31 14:55 ` [PATCH 5/7] EDAC/skx_common: Remove redundant upper bound check for res->imc Qiuxu Zhuo
@ 2025-07-31 14:55 ` Qiuxu Zhuo
  2025-07-31 14:55 ` [PATCH 7/7] EDAC/skx_common: Remove unused *NUM*_IMC macros Qiuxu Zhuo
  2025-08-19 23:33 ` [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Luck, Tony
  7 siblings, 0 replies; 9+ messages in thread
From: Qiuxu Zhuo @ 2025-07-31 14:55 UTC (permalink / raw)
  To: Tony Luck, Borislav Petkov
  Cc: Qiuxu Zhuo, James Morse, Mauro Carvalho Chehab, Robert Richter,
	Lai Yi, linux-edac, linux-kernel

Ideally, read the present DDR memory controller count first and then
allocate the skx_dev list using this count. However, this approach
requires adding a significant amount of code similar to
skx_get_all_bus_mappings() to obtain the PCI bus mappings for the first
socket and use these mappings along with the related PCI register offset
to read the memory controller count.

Given that the Granite Rapids CPU is the only one that can detect the
count of memory controllers at runtime (other CPUs use the count in the
configuration data), to reduce code complexity, reallocate the skx_dev
list only if the preconfigured count of DDR memory controllers differs
from the count read at runtime for Granite Rapids CPU.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
---
 drivers/edac/i10nm_base.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index a3fca2567752..d0218df66a34 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -467,17 +467,18 @@ static int i10nm_get_imc_num(struct res_config *cfg)
 			return -ENODEV;
 		}
 
-		if (imc_num > I10NM_NUM_DDR_IMC) {
-			i10nm_printk(KERN_ERR, "Need to make I10NM_NUM_DDR_IMC >= %d\n", imc_num);
-			return -EINVAL;
-		}
-
 		if (cfg->ddr_imc_num != imc_num) {
 			/*
-			 * Store the number of present DDR memory controllers.
+			 * Update the configuration data to reflect the number of
+			 * present DDR memory controllers.
 			 */
 			cfg->ddr_imc_num = imc_num;
 			edac_dbg(2, "Set DDR MC number: %d", imc_num);
+
+			/* Release and reallocate skx_dev list with the updated number. */
+			skx_remove();
+			if (skx_get_all_bus_mappings(cfg, &i10nm_edac_list) <= 0)
+				return -ENODEV;
 		}
 
 		return 0;
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 7/7] EDAC/skx_common: Remove unused *NUM*_IMC macros
  2025-07-31 14:55 [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Qiuxu Zhuo
                   ` (5 preceding siblings ...)
  2025-07-31 14:55 ` [PATCH 6/7] EDAC/i10nm: Reallocate skx_dev list if preconfigured cnt != runtime cnt Qiuxu Zhuo
@ 2025-07-31 14:55 ` Qiuxu Zhuo
  2025-08-19 23:33 ` [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Luck, Tony
  7 siblings, 0 replies; 9+ messages in thread
From: Qiuxu Zhuo @ 2025-07-31 14:55 UTC (permalink / raw)
  To: Tony Luck, Borislav Petkov
  Cc: Qiuxu Zhuo, James Morse, Mauro Carvalho Chehab, Robert Richter,
	Lai Yi, linux-edac, linux-kernel

There are no references to the *NUM*_IMC macros, so remove them.

Signed-off-by: Qiuxu Zhuo <qiuxu.zhuo@intel.com>
---
 drivers/edac/skx_common.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index e7038fd45d06..73ba89786cdf 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -29,23 +29,18 @@
 #define GET_BITFIELD(v, lo, hi) \
 	(((v) & GENMASK_ULL((hi), (lo))) >> (lo))
 
-#define SKX_NUM_IMC		2	/* Memory controllers per socket */
 #define SKX_NUM_CHANNELS	3	/* Channels per memory controller */
 #define SKX_NUM_DIMMS		2	/* Max DIMMS per channel */
 
-#define I10NM_NUM_DDR_IMC	12
 #define I10NM_NUM_DDR_CHANNELS	2
 #define I10NM_NUM_DDR_DIMMS	2
 
-#define I10NM_NUM_HBM_IMC	16
 #define I10NM_NUM_HBM_CHANNELS	2
 #define I10NM_NUM_HBM_DIMMS	1
 
-#define I10NM_NUM_IMC		(I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
 #define I10NM_NUM_CHANNELS	MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
 #define I10NM_NUM_DIMMS		MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
 
-#define NUM_IMC		MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
 #define NUM_CHANNELS	MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
 #define NUM_DIMMS	MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array
  2025-07-31 14:55 [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Qiuxu Zhuo
                   ` (6 preceding siblings ...)
  2025-07-31 14:55 ` [PATCH 7/7] EDAC/skx_common: Remove unused *NUM*_IMC macros Qiuxu Zhuo
@ 2025-08-19 23:33 ` Luck, Tony
  7 siblings, 0 replies; 9+ messages in thread
From: Luck, Tony @ 2025-08-19 23:33 UTC (permalink / raw)
  To: Qiuxu Zhuo
  Cc: Borislav Petkov, James Morse, Mauro Carvalho Chehab,
	Robert Richter, Lai Yi, linux-edac, linux-kernel

On Thu, Jul 31, 2025 at 10:55:27PM +0800, Qiuxu Zhuo wrote:
> Problem
> =======
> The current array of memory controller instances for Intel server EDAC
> driver is sized using the macro NUM_IMC. Each time EDAC support is added
> for a new CPU, NUM_IMC needs to be updated to ensure it is greater than
> or equal to the number of memory controllers for the new CPU. This approach
> is inconvenient and also results in memory waste for older CPUs with fewer
> memory controllers.
> 
> Solution
> ========
> Make the array of memory controller instances a flexible array and
> determine its size from configuration data or at runtime.
> 
> Patches
> =======
> Patch 1~3: Refactor code to be independent of *NUM*_IMC macros.
> Patch   4: Make the array of memory controller instances a flexible array.
> Patch 5~7: Clean up and remove unused *NUM*_IMC macros.
> 
> Testing
> =======
> Pass basic testing on Cascade Lake, {Sapphire, Granite} Rapids server CPUs.
> - Load and unload the {skx,i10nm_}edac driver.
> - Receive events for memory correctable errors.
> - Decode memory errors.
> 
> This patch series is on top of v6.16.

Applied to edac-drivers branch of RAS tree.

Thanks

-Tony

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2025-08-19 23:33 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-31 14:55 [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Qiuxu Zhuo
2025-07-31 14:55 ` [PATCH 1/7] EDAC/{skx_common,skx}: Use configuration data, not global macros Qiuxu Zhuo
2025-07-31 14:55 ` [PATCH 2/7] EDAC/skx_common: Move mc_mapping to be a field inside struct skx_imc Qiuxu Zhuo
2025-07-31 14:55 ` [PATCH 3/7] EDAC/skx_common: Swap memory controller index mapping Qiuxu Zhuo
2025-07-31 14:55 ` [PATCH 4/7] EDAC/skx_common: Make skx_dev->imc[] a flexible array Qiuxu Zhuo
2025-07-31 14:55 ` [PATCH 5/7] EDAC/skx_common: Remove redundant upper bound check for res->imc Qiuxu Zhuo
2025-07-31 14:55 ` [PATCH 6/7] EDAC/i10nm: Reallocate skx_dev list if preconfigured cnt != runtime cnt Qiuxu Zhuo
2025-07-31 14:55 ` [PATCH 7/7] EDAC/skx_common: Remove unused *NUM*_IMC macros Qiuxu Zhuo
2025-08-19 23:33 ` [PATCH 0/7] EDAC/Intel: Make memory controller instances into a flexible array Luck, Tony

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox