public inbox for linux-edac@vger.kernel.org
 help / color / mirror / Atom feed
From: Kyle Meyer <kyle.meyer@hpe.com>
To: tony.luck@intel.com, bp@alien8.de, james.morse@arm.com,
	mchehab@kernel.org, rric@kernel.org, linux-edac@vger.kernel.org,
	linux-kernel@vger.kernel.org
Cc: Kyle Meyer <kyle.meyer@hpe.com>
Subject: [PATCH] EDAC/{i10nm,skx,skx_common}: Support multiple clumps
Date: Thu,  5 Dec 2024 10:59:54 -0600	[thread overview]
Message-ID: <20241205165954.7957-1-kyle.meyer@hpe.com> (raw)

The 3-bit source IDs in PCI configuration space registers are limited to
8 unique IDs, and each ID is local to a clump (UPI/QPI domain).

Source IDs can not be used to map devices to sockets on systems with
multiple clumps because each clump has identical repeating source IDs.

Get package IDs instead of source IDs on systems with multiple clumps
and use package/source IDs to name IMC information structures.

Signed-off-by: Kyle Meyer <kyle.meyer@hpe.com>
---
 drivers/edac/i10nm_base.c | 21 +++++++++-------
 drivers/edac/skx_base.c   | 19 ++++++++------
 drivers/edac/skx_common.c | 52 +++++++++++++++++++++++++++++++++------
 drivers/edac/skx_common.h |  5 ++--
 4 files changed, 71 insertions(+), 26 deletions(-)

diff --git a/drivers/edac/i10nm_base.c b/drivers/edac/i10nm_base.c
index 51556c72a967..59384677d025 100644
--- a/drivers/edac/i10nm_base.c
+++ b/drivers/edac/i10nm_base.c
@@ -1010,7 +1010,7 @@ static struct notifier_block i10nm_mce_dec = {
 
 static int __init i10nm_init(void)
 {
-	u8 mc = 0, src_id = 0, node_id = 0;
+	u8 mc = 0, src_id = 0;
 	const struct x86_cpu_id *id;
 	struct res_config *cfg;
 	const char *owner;
@@ -1018,6 +1018,7 @@ static int __init i10nm_init(void)
 	int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
 	u64 tolm, tohm;
 	int imc_num;
+	int dup_src_ids = 0;
 
 	edac_dbg(2, "\n");
 
@@ -1065,24 +1066,26 @@ static int __init i10nm_init(void)
 
 	imc_num = res_cfg->ddr_imc_num + res_cfg->hbm_imc_num;
 
-	list_for_each_entry(d, i10nm_edac_list, list) {
-		rc = skx_get_src_id(d, 0xf8, &src_id);
-		if (rc < 0)
-			goto fail;
+	rc = dup_src_ids = skx_check_dup_src_ids(0xf8);
+	if (rc < 0)
+		goto fail;
 
-		rc = skx_get_node_id(d, &node_id);
+	list_for_each_entry(d, i10nm_edac_list, list) {
+		if (dup_src_ids)
+			rc = skx_get_pkg_id(d, &src_id);
+		else
+			rc = skx_get_src_id(d, 0xf8, &src_id);
 		if (rc < 0)
 			goto fail;
 
-		edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
+		edac_dbg(2, "src_id = %d\n", src_id);
 		for (i = 0; i < imc_num; i++) {
 			if (!d->imc[i].mdev)
 				continue;
 
 			d->imc[i].mc  = mc++;
 			d->imc[i].lmc = i;
-			d->imc[i].src_id  = src_id;
-			d->imc[i].node_id = node_id;
+			d->imc[i].src_id = src_id;
 			if (d->imc[i].hbm_mc) {
 				d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
 				d->imc[i].num_channels = cfg->hbm_chan_num;
diff --git a/drivers/edac/skx_base.c b/drivers/edac/skx_base.c
index 14cfd394b469..189b8c5a1bda 100644
--- a/drivers/edac/skx_base.c
+++ b/drivers/edac/skx_base.c
@@ -600,8 +600,9 @@ static int __init skx_init(void)
 	const struct munit *m;
 	const char *owner;
 	int rc = 0, i, off[3] = {0xd0, 0xd4, 0xd8};
-	u8 mc = 0, src_id, node_id;
+	u8 mc = 0, src_id;
 	struct skx_dev *d;
+	int dup_src_ids = 0;
 
 	edac_dbg(2, "\n");
 
@@ -646,19 +647,23 @@ static int __init skx_init(void)
 		}
 	}
 
+	rc = dup_src_ids = skx_check_dup_src_ids(0xf0);
+	if (rc < 0)
+		goto fail;
+
 	list_for_each_entry(d, skx_edac_list, list) {
-		rc = skx_get_src_id(d, 0xf0, &src_id);
-		if (rc < 0)
-			goto fail;
-		rc = skx_get_node_id(d, &node_id);
+		if (dup_src_ids)
+			rc = skx_get_pkg_id(d, &src_id);
+		else
+			rc = skx_get_src_id(d, 0xf0, &src_id);
 		if (rc < 0)
 			goto fail;
-		edac_dbg(2, "src_id=%d node_id=%d\n", src_id, node_id);
+
+		edac_dbg(2, "src_id = %d\n", src_id);
 		for (i = 0; i < SKX_NUM_IMC; i++) {
 			d->imc[i].mc = mc++;
 			d->imc[i].lmc = i;
 			d->imc[i].src_id = src_id;
-			d->imc[i].node_id = node_id;
 			rc = skx_register_mci(&d->imc[i], d->imc[i].chan[0].cdev,
 					      "Skylake Socket", EDAC_MOD_STR,
 					      skx_get_dimm_config, cfg);
diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c
index 6cf17af7d911..56fec7310f40 100644
--- a/drivers/edac/skx_common.c
+++ b/drivers/edac/skx_common.c
@@ -235,19 +235,55 @@ int skx_get_src_id(struct skx_dev *d, int off, u8 *id)
 }
 EXPORT_SYMBOL_GPL(skx_get_src_id);
 
-int skx_get_node_id(struct skx_dev *d, u8 *id)
+int skx_check_dup_src_ids(int off)
 {
-	u32 reg;
+	u8 id;
+	struct skx_dev *d;
+	int rc;
+	DECLARE_BITMAP(id_map, 8);
 
-	if (pci_read_config_dword(d->util_all, 0xf4, &reg)) {
-		skx_printk(KERN_ERR, "Failed to read node id\n");
-		return -ENODEV;
+	bitmap_zero(id_map, 8);
+
+	/*
+	 * The 3-bit source IDs in PCI configuration space registers are limited
+	 * to 8 unique IDs, and each ID is local to a clump (UPI/QPI domain).
+	 */
+	list_for_each_entry(d, &dev_edac_list, list) {
+		rc = skx_get_src_id(d, off, &id);
+		if (rc < 0)
+			return rc;
+
+		if (test_bit(id, id_map))
+			return 1;
+
+		set_bit(id, id_map);
 	}
 
-	*id = GET_BITFIELD(reg, 0, 2);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(skx_get_node_id);
+EXPORT_SYMBOL_GPL(skx_check_dup_src_ids);
+
+int skx_get_pkg_id(struct skx_dev *d, u8 *id)
+{
+	int node;
+	int cpu;
+
+	node = pcibus_to_node(d->util_all->bus);
+	if (numa_valid_node(node)) {
+		for_each_cpu(cpu, cpumask_of_pcibus(d->util_all->bus)) {
+			struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+			if (c->initialized && cpu_to_node(cpu) == node) {
+				*id = c->topo.pkg_id;
+				return 0;
+			}
+		}
+	}
+
+	skx_printk(KERN_ERR, "Failed to get package ID from NUMA information\n");
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(skx_get_pkg_id);
 
 static int get_width(u32 mtr)
 {
@@ -507,7 +543,7 @@ int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
 	pvt->imc = imc;
 
 	mci->ctl_name = kasprintf(GFP_KERNEL, "%s#%d IMC#%d", ctl_name,
-				  imc->node_id, imc->lmc);
+				  imc->src_id, imc->lmc);
 	if (!mci->ctl_name) {
 		rc = -ENOMEM;
 		goto fail0;
diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h
index 54bba8a62f72..0f06d45c9b3e 100644
--- a/drivers/edac/skx_common.h
+++ b/drivers/edac/skx_common.h
@@ -103,7 +103,7 @@ struct skx_dev {
 		bool hbm_mc;
 		u8 mc;	/* system wide mc# */
 		u8 lmc;	/* socket relative mc# */
-		u8 src_id, node_id;
+		u8 src_id;
 		struct skx_channel {
 			struct pci_dev	*cdev;
 			struct pci_dev	*edev;
@@ -244,7 +244,8 @@ void skx_set_mem_cfg(bool mem_cfg_2lm);
 void skx_set_res_cfg(struct res_config *cfg);
 
 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
-int skx_get_node_id(struct skx_dev *d, u8 *id);
+int skx_check_dup_src_ids(int off);
+int skx_get_pkg_id(struct skx_dev *d, u8 *id);
 
 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
 
-- 
2.47.1


             reply	other threads:[~2024-12-05 17:28 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-05 16:59 Kyle Meyer [this message]
2024-12-05 19:13 ` [PATCH] EDAC/{i10nm,skx,skx_common}: Support multiple clumps Luck, Tony
2024-12-05 20:05   ` Kyle Meyer
2024-12-05 22:52     ` Luck, Tony
2024-12-05 23:57       ` Luck, Tony
2024-12-06  0:57         ` Kyle Meyer
2024-12-06  1:26           ` Zhuo, Qiuxu
2024-12-06  2:33             ` Kyle Meyer
2024-12-06 21:24               ` Luck, Tony
2024-12-06 22:09                 ` Luck, Tony
2024-12-10 16:37                   ` Bjorn Helgaas
2024-12-10 17:50                     ` Luck, Tony

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241205165954.7957-1-kyle.meyer@hpe.com \
    --to=kyle.meyer@hpe.com \
    --cc=bp@alien8.de \
    --cc=james.morse@arm.com \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mchehab@kernel.org \
    --cc=rric@kernel.org \
    --cc=tony.luck@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox