[PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms

Linux Perf Users
 help / color / mirror / Atom feed

* [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
@ 2026-05-27 22:19 Chun-Tse Shao
  2026-05-27 22:19 ` [PATCH RESEND v7 2/2] perf pmu intel: Adjust cpumasks for sub-NUMA clusters on Sapphire Rapids and Emerald Rapids Chun-Tse Shao
  2026-05-28  0:11 ` [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms sashiko-bot
  0 siblings, 2 replies; 5+ messages in thread
From: Chun-Tse Shao @ 2026-05-27 22:19 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Namhyung Kim
  Cc: Peter Zijlstra, Ingo Molnar, Mark Rutland, Alexander Shishkin,
	Jiri Olsa, Ian Rogers, Adrian Hunter, James Clark, Zide Chen,
	linux-perf-users, linux-kernel, Chun-Tse Shao

Prepare for supporting more Intel platforms with sub-NUMA clustering by
generalizing the GNR specific logic.

Reviewed-by: Zide Chen <zide.chen@intel.com>
Signed-off-by: Chun-Tse Shao <ctshao@google.com>
Assisted-by: Gemini:gemini-3.1-pro-preview
---
v7:
  Fixed based on Sashiko review:
  - Refactor PMU initialization to be fully thread-safe using
    pthread_once and pthread_mutex locks.
  - Fix a critical bounds check bug on single-socket systems.
  - Fix potential divide-by-zero crash in uncore_cha_snc.
  - Avoid spurious warnings on non-SNC systems.
  - Avoid silent and inconsistent uncore mappings on unsupported SNC
    configurations.
  - Resolve checkpatch linter warnings.

v6: lore.kernel.org/20260515172710.428474-1-ctshao@google.com
  Make string literal.
  Add SPR into SNC2.

v5: lore.kernel.org/20260407203918.3178481-1-ctshao@google.com
  Split patch.

v4: lore.kernel.org/20260402205300.1953706-1-ctshao@google.com
  Rebase.

v3: lore.kernel.org/20260212223942.3832857-1-ctshao@google.com
  Fix a typo.

v2: lore.kernel.org/20260205232220.1980168-1-ctshao@google.com
  Split EMR and GNR in the SNC2 IMC cpu map.

v1: lore.kernel.org/20260108184430.1210223-1-ctshao@google.com

 tools/perf/arch/x86/util/pmu.c | 199 +++++++++++++++++++++------------
 1 file changed, 130 insertions(+), 69 deletions(-)

diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 7c9d238922a6..9b00d5720fb7 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
 #include <string.h>
 #include <stdio.h>
 #include <sys/types.h>
@@ -22,20 +23,31 @@
 #include "util/env.h"
 #include "util/header.h"

-static bool x86__is_intel_graniterapids(void)
-{
-	static bool checked_if_graniterapids;
-	static bool is_graniterapids;
+#define GENUINE_INTEL_GNR "GenuineIntel-6-A[DE]"

-	if (!checked_if_graniterapids) {
-		const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
-		char *cpuid = get_cpuid_str((struct perf_cpu){0});
+static bool cached_snc_supported;
+static pthread_once_t snc_support_once = PTHREAD_ONCE_INIT;

-		is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
-		free(cpuid);
-		checked_if_graniterapids = true;
+static void init_snc_support(void)
+{
+	/* Graniterapids supports SNC configuration. */
+	static const char *const supported_cpuids[] = {
+		GENUINE_INTEL_GNR, /* Graniterapids */
+	};
+	char *cpuid = get_cpuid_str((struct perf_cpu){0});
+
+	for (size_t i = 0; i < ARRAY_SIZE(supported_cpuids); i++) {
+		cached_snc_supported = cpuid && strcmp_cpuid_str(supported_cpuids[i], cpuid) == 0;
+		if (cached_snc_supported)
+			break;
 	}
-	return is_graniterapids;
+	free(cpuid);
+}
+
+static bool x86__is_snc_supported(void)
+{
+	pthread_once(&snc_support_once, init_snc_support);
+	return cached_snc_supported;
 }

 static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
@@ -52,49 +64,58 @@ static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
 	return cpus;
 }

-static int snc_nodes_per_l3_cache(void)
+static int cached_snc_nodes;
+static pthread_once_t snc_nodes_once = PTHREAD_ONCE_INIT;
+
+static void init_snc_nodes(void)
 {
-	static bool checked_snc;
-	static int snc_nodes;
-
-	if (!checked_snc) {
-		struct perf_cpu_map *node_cpus =
-			read_sysfs_cpu_map("devices/system/node/node0/cpulist");
-		struct perf_cpu_map *cache_cpus =
-			read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
-
-		snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
-		perf_cpu_map__put(cache_cpus);
-		perf_cpu_map__put(node_cpus);
-		checked_snc = true;
-	}
-	return snc_nodes;
+	struct perf_cpu_map *node_cpus =
+		read_sysfs_cpu_map("devices/system/node/node0/cpulist");
+	struct perf_cpu_map *cache_cpus =
+		read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
+
+	if (node_cpus && cache_cpus)
+		cached_snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
+	else
+		cached_snc_nodes = 0;
+	perf_cpu_map__put(cache_cpus);
+	perf_cpu_map__put(node_cpus);
 }

-static int num_chas(void)
+static int snc_nodes_per_l3_cache(void)
 {
-	static bool checked_chas;
-	static int num_chas;
+	pthread_once(&snc_nodes_once, init_snc_nodes);
+	return cached_snc_nodes;
+}

-	if (!checked_chas) {
-		int fd = perf_pmu__event_source_devices_fd();
-		struct io_dir dir;
-		struct io_dirent64 *dent;
+static int cached_num_chas;
+static pthread_once_t num_chas_once = PTHREAD_ONCE_INIT;

-		if (fd < 0)
-			return -1;
+static void init_num_chas(void)
+{
+	int fd = perf_pmu__event_source_devices_fd();
+	struct io_dir dir;
+	struct io_dirent64 *dent;

-		io_dir__init(&dir, fd);
+	if (fd < 0) {
+		cached_num_chas = -1;
+		return;
+	}

-		while ((dent = io_dir__readdir(&dir)) != NULL) {
-			/* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */
-			if (strstarts(dent->d_name, "uncore_cha_"))
-				num_chas++;
-		}
-		close(fd);
-		checked_chas = true;
+	io_dir__init(&dir, fd);
+
+	while ((dent = io_dir__readdir(&dir)) != NULL) {
+		/* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */
+		if (strstarts(dent->d_name, "uncore_cha_"))
+			cached_num_chas++;
 	}
-	return num_chas;
+	close(fd);
+}
+
+static int num_chas(void)
+{
+	pthread_once(&num_chas_once, init_num_chas);
+	return cached_num_chas;
 }

 #define MAX_SNCS 6
@@ -121,46 +142,73 @@ static int uncore_cha_snc(struct perf_pmu *pmu)
 		return 0;
 	}
 	chas_per_node = num_cha / snc_nodes;
+	if (chas_per_node == 0) {
+		pr_warning("Unexpected: chas_per_node is 0 (num_cha=%d, snc_nodes=%d)\n",
+			   num_cha, snc_nodes);
+		return 0;
+	}
 	cha_snc = cha_num / chas_per_node;

 	/* Range check cha_snc. for unexpected out of bounds. */
 	return cha_snc >= MAX_SNCS ? 0 : cha_snc;
 }

-static int uncore_imc_snc(struct perf_pmu *pmu)
+static const u8 *cached_imc_snc_map;
+static size_t cached_imc_snc_map_len;
+static pthread_once_t imc_snc_map_once = PTHREAD_ONCE_INIT;
+
+static void init_snc_map(void)
 {
-	// Compute the IMC SNC using lookup tables.
-	unsigned int imc_num;
 	int snc_nodes = snc_nodes_per_l3_cache();
-	const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
-	const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
-	const u8 *snc_map;
-	size_t snc_map_len;
+	char *cpuid;
+	static const u8 gnr_snc2_map[] = { 1, 1, 0, 0 };
+	static const u8 snc3_map[] = { 1, 1, 0, 0, 2, 2 };

 	switch (snc_nodes) {
 	case 2:
-		snc_map = snc2_map;
-		snc_map_len = ARRAY_SIZE(snc2_map);
+		cpuid = get_cpuid_str((struct perf_cpu){ 0 });
+		if (cpuid) {
+			if (strcmp_cpuid_str(GENUINE_INTEL_GNR, cpuid) == 0) {
+				cached_imc_snc_map = gnr_snc2_map;
+				cached_imc_snc_map_len = ARRAY_SIZE(gnr_snc2_map);
+			}
+			free(cpuid);
+		}
 		break;
 	case 3:
-		snc_map = snc3_map;
-		snc_map_len = ARRAY_SIZE(snc3_map);
+		cached_imc_snc_map = snc3_map;
+		cached_imc_snc_map_len = ARRAY_SIZE(snc3_map);
 		break;
 	default:
 		/* Error or no lookup support for SNC with >3 nodes. */
-		return 0;
+		break;
 	}

+	if (!cached_imc_snc_map)
+		pr_warning("Unexpected: can not find snc map config\n");
+}
+
+static int uncore_imc_snc(struct perf_pmu *pmu)
+{
+	// Compute the IMC SNC using lookup tables.
+	unsigned int imc_num;
+	int snc_nodes = snc_nodes_per_l3_cache();
+
+	if (snc_nodes <= 1)
+		return 0;
+
+	pthread_once(&imc_snc_map_once, init_snc_map);
+
 	/* Compute SNC for PMU. */
 	if (sscanf(pmu->name, "uncore_imc_%u", &imc_num) != 1) {
 		pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
 		return 0;
 	}
-	if (imc_num >= snc_map_len) {
-		pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
+
+	if (!cached_imc_snc_map)
 		return 0;
-	}
-	return snc_map[imc_num];
+
+	return cached_imc_snc_map[imc_num % cached_imc_snc_map_len];
 }

 static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
@@ -200,7 +248,9 @@ static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
 	return cpu_adjust[pmu_snc];
 }

-static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
+static pthread_mutex_t pmu_adjust_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
 {
 	// With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
 	// topology. For example, a two socket graniterapids machine may be set
@@ -231,9 +281,11 @@ static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool
 		return;
 	}

+	pthread_mutex_lock(&pmu_adjust_mutex);
+
 	pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu);
 	if (pmu_snc == 0) {
-		// No adjustment necessary for the first SNC.
+		pthread_mutex_unlock(&pmu_adjust_mutex);
 		return;
 	}

@@ -242,8 +294,10 @@ static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool
 		// Hold onto the perf_cpu_map globally to avoid recomputation.
 		cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc);
 		adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus));
-		if (!adjusted[pmu_snc])
+		if (!adjusted[pmu_snc]) {
+			pthread_mutex_unlock(&pmu_adjust_mutex);
 			return;
+		}
 	}

 	perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) {
@@ -263,6 +317,8 @@ static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool

 	perf_cpu_map__put(pmu->cpus);
 	pmu->cpus = perf_cpu_map__get(adjusted[pmu_snc]);
+
+	pthread_mutex_unlock(&pmu_adjust_mutex);
 }

 void perf_pmu__arch_init(struct perf_pmu *pmu)
@@ -300,11 +356,16 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
 				pmu->mem_events = perf_mem_events_intel_aux;
 			else
 				pmu->mem_events = perf_mem_events_intel;
-		} else if (x86__is_intel_graniterapids()) {
-			if (strstarts(pmu->name, "uncore_cha_"))
-				gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
-			else if (strstarts(pmu->name, "uncore_imc_"))
-				gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
+		} else if (x86__is_snc_supported()) {
+			int snc_nodes = snc_nodes_per_l3_cache();
+
+			if (snc_nodes == 2 || snc_nodes == 3) {
+				if (strstarts(pmu->name, "uncore_cha_"))
+					uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
+				else if (strstarts(pmu->name, "uncore_imc_") &&
+					 !strstarts(pmu->name, "uncore_imc_free_running"))
+					uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
+			}
 		}
 	}
 }
--
2.54.0.823.g6e5bcc1fc9-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH RESEND v7 2/2] perf pmu intel: Adjust cpumasks for sub-NUMA clusters on Sapphire Rapids and Emerald Rapids
  2026-05-27 22:19 [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms Chun-Tse Shao
@ 2026-05-27 22:19 ` Chun-Tse Shao
  2026-05-28  0:11 ` [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms sashiko-bot
  1 sibling, 0 replies; 5+ messages in thread
From: Chun-Tse Shao @ 2026-05-27 22:19 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Namhyung Kim
  Cc: Peter Zijlstra, Ingo Molnar, Mark Rutland, Alexander Shishkin,
	Jiri Olsa, Ian Rogers, Adrian Hunter, James Clark, Zide Chen,
	linux-perf-users, linux-kernel, Chun-Tse Shao

Similar to GNR [1], Sapphire Rapids and Emerald Rapids support sub-NUMA
clusters as well. Adjust cpumasks using the same logic as GNR in [1].

Tested on Emerald Rapids with SNC2 enabled:
  $ perf stat --per-node -e 'UNC_CHA_CLOCKTICKS,UNC_M_CLOCKTICKS' -a -- sleep 1

   Performance counter stats for 'system wide':

  N0       30        72125876670      UNC_CHA_CLOCKTICKS
  N0        4         8815163648      UNC_M_CLOCKTICKS
  N1       30        72124958844      UNC_CHA_CLOCKTICKS
  N1        4         8815014974      UNC_M_CLOCKTICKS
  N2       30        72121049022      UNC_CHA_CLOCKTICKS
  N2        4         8814592626      UNC_M_CLOCKTICKS
  N3       30        72117133854      UNC_CHA_CLOCKTICKS
  N3        4         8814012840      UNC_M_CLOCKTICKS

         1.001574118 seconds time elapsed

[1] lore.kernel.org/20250515181417.491401-1-irogers@google.com

Reviewed-by: Zide Chen <zide.chen@intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Chun-Tse Shao <ctshao@google.com>
Assisted-by: Gemini:gemini-3.1-pro-preview
---
 tools/perf/arch/x86/util/pmu.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 9b00d5720fb7..2c24ef3140da 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -23,6 +23,8 @@
 #include "util/env.h"
 #include "util/header.h"

+#define GENUINE_INTEL_SPR "GenuineIntel-6-8F"
+#define GENUINE_INTEL_EMR "GenuineIntel-6-CF"
 #define GENUINE_INTEL_GNR "GenuineIntel-6-A[DE]"

 static bool cached_snc_supported;
@@ -30,8 +32,10 @@ static pthread_once_t snc_support_once = PTHREAD_ONCE_INIT;

 static void init_snc_support(void)
 {
-	/* Graniterapids supports SNC configuration. */
+	/* Sapphirerapids Emeraldrapids Graniterapids support SNC configuration. */
 	static const char *const supported_cpuids[] = {
+		GENUINE_INTEL_SPR, /* Sapphirerapids */
+		GENUINE_INTEL_EMR, /* Emeraldrapids */
 		GENUINE_INTEL_GNR, /* Graniterapids */
 	};
 	char *cpuid = get_cpuid_str((struct perf_cpu){0});
@@ -161,6 +165,7 @@ static void init_snc_map(void)
 {
 	int snc_nodes = snc_nodes_per_l3_cache();
 	char *cpuid;
+	static const u8 spr_emr_snc2_map[] = { 0, 0, 1, 1 };
 	static const u8 gnr_snc2_map[] = { 1, 1, 0, 0 };
 	static const u8 snc3_map[] = { 1, 1, 0, 0, 2, 2 };

@@ -168,7 +173,11 @@ static void init_snc_map(void)
 	case 2:
 		cpuid = get_cpuid_str((struct perf_cpu){ 0 });
 		if (cpuid) {
-			if (strcmp_cpuid_str(GENUINE_INTEL_GNR, cpuid) == 0) {
+			if (strcmp_cpuid_str(GENUINE_INTEL_SPR, cpuid) == 0 ||
+			    strcmp_cpuid_str(GENUINE_INTEL_EMR, cpuid) == 0) {
+				cached_imc_snc_map = spr_emr_snc2_map;
+				cached_imc_snc_map_len = ARRAY_SIZE(spr_emr_snc2_map);
+			} else if (strcmp_cpuid_str(GENUINE_INTEL_GNR, cpuid) == 0) {
 				cached_imc_snc_map = gnr_snc2_map;
 				cached_imc_snc_map_len = ARRAY_SIZE(gnr_snc2_map);
 			}
--
2.54.0.823.g6e5bcc1fc9-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
  2026-05-27 22:19 [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms Chun-Tse Shao
  2026-05-27 22:19 ` [PATCH RESEND v7 2/2] perf pmu intel: Adjust cpumasks for sub-NUMA clusters on Sapphire Rapids and Emerald Rapids Chun-Tse Shao
@ 2026-05-28  0:11 ` sashiko-bot
  2026-05-28 21:31   ` Chun-Tse Shao
  1 sibling, 1 reply; 5+ messages in thread
From: sashiko-bot @ 2026-05-28  0:11 UTC (permalink / raw)
  To: Chun-Tse Shao; +Cc: linux-perf-users

Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:

Pre-existing issues:
- [Critical] An integer overflow in `uncore_cha_snc()` can result in a negative Sub-NUMA Clustering (SNC) node index. This bypasses the upper-bound check against `MAX_SNCS` and causes subsequent out-of-bounds array accesses.
--

commit be394e8b7ddbe730c3fc40fb608337fe19c7b33b
Author: Chun-Tse Shao <ctshao@google.com>

perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms

This commit generalizes the Sub-NUMA Clustering (SNC) logic previously
specific to Granite Rapids to support multiple Intel platforms. It uses
CPUID to determine SNC support and caches various PMU configurations
to improve thread safety.

> diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> index 7c9d238922a6c..9b00d5720fb7a 100644
> --- a/tools/perf/arch/x86/util/pmu.c
> +++ b/tools/perf/arch/x86/util/pmu.c

[ ... ]

> @@ -121,46 +142,73 @@ static int uncore_cha_snc(struct perf_pmu *pmu)
>  		return 0;
>  	}
>  	chas_per_node = num_cha / snc_nodes;
> +	if (chas_per_node == 0) {
> +		pr_warning("Unexpected: chas_per_node is 0 (num_cha=%d, snc_nodes=%d)\n",
> +			   num_cha, snc_nodes);
> +		return 0;
> +	}
>  	cha_snc = cha_num / chas_per_node;
>  
>  	/* Range check cha_snc. for unexpected out of bounds. */
>  	return cha_snc >= MAX_SNCS ? 0 : cha_snc;

[Severity: Critical]
This is a pre-existing issue, but does this range check safely handle very
large values for cha_num?

Since cha_num is parsed as an unsigned int via sscanf() from the PMU name,
and cha_snc is a signed int, a sufficiently large cha_num could cause the
division result to overflow into a negative value.

If cha_snc becomes negative, the cha_snc >= MAX_SNCS check evaluates to
false, allowing the function to return a negative index.

Could this negative index then cause out-of-bounds array accesses when used
later in uncore_cha_imc_compute_cpu_adjust()?

uncore_cha_imc_compute_cpu_adjust() {
    static bool checked_cpu_adjust[MAX_SNCS];
    ...
    if (checked_cpu_adjust[pmu_snc])
    ...
}

-- 
Sashiko AI review · https://sashiko.dev/#/patchset/20260527221934.3830896-1-ctshao@google.com?part=1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
  2026-05-28  0:11 ` [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms sashiko-bot
@ 2026-05-28 21:31   ` Chun-Tse Shao
  2026-06-09 18:44     ` Chun-Tse Shao
  0 siblings, 1 reply; 5+ messages in thread
From: Chun-Tse Shao @ 2026-05-28 21:31 UTC (permalink / raw)
  To: sashiko-reviews; +Cc: linux-perf-users

On Wed, May 27, 2026 at 5:11 PM <sashiko-bot@kernel.org> wrote:
>
> Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
>
> Pre-existing issues:
> - [Critical] An integer overflow in `uncore_cha_snc()` can result in a negative Sub-NUMA Clustering (SNC) node index. This bypasses the upper-bound check against `MAX_SNCS` and causes subsequent out-of-bounds array accesses.
> --
>
> commit be394e8b7ddbe730c3fc40fb608337fe19c7b33b
> Author: Chun-Tse Shao <ctshao@google.com>
>
> perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
>
> This commit generalizes the Sub-NUMA Clustering (SNC) logic previously
> specific to Granite Rapids to support multiple Intel platforms. It uses
> CPUID to determine SNC support and caches various PMU configurations
> to improve thread safety.
>
> > diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> > index 7c9d238922a6c..9b00d5720fb7a 100644
> > --- a/tools/perf/arch/x86/util/pmu.c
> > +++ b/tools/perf/arch/x86/util/pmu.c
>
> [ ... ]
>
> > @@ -121,46 +142,73 @@ static int uncore_cha_snc(struct perf_pmu *pmu)
> >               return 0;
> >       }
> >       chas_per_node = num_cha / snc_nodes;
> > +     if (chas_per_node == 0) {
> > +             pr_warning("Unexpected: chas_per_node is 0 (num_cha=%d, snc_nodes=%d)\n",
> > +                        num_cha, snc_nodes);
> > +             return 0;
> > +     }
> >       cha_snc = cha_num / chas_per_node;
> >
> >       /* Range check cha_snc. for unexpected out of bounds. */
> >       return cha_snc >= MAX_SNCS ? 0 : cha_snc;
>
> [Severity: Critical]
> This is a pre-existing issue, but does this range check safely handle very
> large values for cha_num?
>
> Since cha_num is parsed as an unsigned int via sscanf() from the PMU name,
> and cha_snc is a signed int, a sufficiently large cha_num could cause the
> division result to overflow into a negative value.

I think it is very unlikely we would see cha_num > INT_MAX.
Since that is a pre-existing issue, I prefer to fix it separately.

Thanks,
CT

>
> If cha_snc becomes negative, the cha_snc >= MAX_SNCS check evaluates to
> false, allowing the function to return a negative index.
>
> Could this negative index then cause out-of-bounds array accesses when used
> later in uncore_cha_imc_compute_cpu_adjust()?
>
> uncore_cha_imc_compute_cpu_adjust() {
>     static bool checked_cpu_adjust[MAX_SNCS];
>     ...
>     if (checked_cpu_adjust[pmu_snc])
>     ...
> }
>
> --
> Sashiko AI review · https://sashiko.dev/#/patchset/20260527221934.3830896-1-ctshao@google.com?part=1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
  2026-05-28 21:31   ` Chun-Tse Shao
@ 2026-06-09 18:44     ` Chun-Tse Shao
  0 siblings, 0 replies; 5+ messages in thread
From: Chun-Tse Shao @ 2026-06-09 18:44 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Ian Rogers, Namhyung Kim
  Cc: linux-perf-users, Peter Zijlstra, Ingo Molnar, Mark Rutland,
	Alexander Shishkin, Jiri Olsa, Adrian Hunter, James Clark,
	Zide Chen, Linux Kernel Mailing List, Chun-Tse Shao

Ping for attention.

Thanks,
CT

On Thu, May 28, 2026 at 2:31 PM Chun-Tse Shao <ctshao@google.com> wrote:
>
> On Wed, May 27, 2026 at 5:11 PM <sashiko-bot@kernel.org> wrote:
> >
> > Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
> >
> > Pre-existing issues:
> > - [Critical] An integer overflow in `uncore_cha_snc()` can result in a negative Sub-NUMA Clustering (SNC) node index. This bypasses the upper-bound check against `MAX_SNCS` and causes subsequent out-of-bounds array accesses.
> > --
> >
> > commit be394e8b7ddbe730c3fc40fb608337fe19c7b33b
> > Author: Chun-Tse Shao <ctshao@google.com>
> >
> > perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
> >
> > This commit generalizes the Sub-NUMA Clustering (SNC) logic previously
> > specific to Granite Rapids to support multiple Intel platforms. It uses
> > CPUID to determine SNC support and caches various PMU configurations
> > to improve thread safety.
> >
> > > diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> > > index 7c9d238922a6c..9b00d5720fb7a 100644
> > > --- a/tools/perf/arch/x86/util/pmu.c
> > > +++ b/tools/perf/arch/x86/util/pmu.c
> >
> > [ ... ]
> >
> > > @@ -121,46 +142,73 @@ static int uncore_cha_snc(struct perf_pmu *pmu)
> > >               return 0;
> > >       }
> > >       chas_per_node = num_cha / snc_nodes;
> > > +     if (chas_per_node == 0) {
> > > +             pr_warning("Unexpected: chas_per_node is 0 (num_cha=%d, snc_nodes=%d)\n",
> > > +                        num_cha, snc_nodes);
> > > +             return 0;
> > > +     }
> > >       cha_snc = cha_num / chas_per_node;
> > >
> > >       /* Range check cha_snc. for unexpected out of bounds. */
> > >       return cha_snc >= MAX_SNCS ? 0 : cha_snc;
> >
> > [Severity: Critical]
> > This is a pre-existing issue, but does this range check safely handle very
> > large values for cha_num?
> >
> > Since cha_num is parsed as an unsigned int via sscanf() from the PMU name,
> > and cha_snc is a signed int, a sufficiently large cha_num could cause the
> > division result to overflow into a negative value.
>
> I think it is very unlikely we would see cha_num > INT_MAX.
> Since that is a pre-existing issue, I prefer to fix it separately.
>
> Thanks,
> CT
>
> >
> > If cha_snc becomes negative, the cha_snc >= MAX_SNCS check evaluates to
> > false, allowing the function to return a negative index.
> >
> > Could this negative index then cause out-of-bounds array accesses when used
> > later in uncore_cha_imc_compute_cpu_adjust()?
> >
> > uncore_cha_imc_compute_cpu_adjust() {
> >     static bool checked_cpu_adjust[MAX_SNCS];
> >     ...
> >     if (checked_cpu_adjust[pmu_snc])
> >     ...
> > }
> >
> > --
> > Sashiko AI review · https://sashiko.dev/#/patchset/20260527221934.3830896-1-ctshao@google.com?part=1

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-06-09 18:44 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-27 22:19 [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms Chun-Tse Shao
2026-05-27 22:19 ` [PATCH RESEND v7 2/2] perf pmu intel: Adjust cpumasks for sub-NUMA clusters on Sapphire Rapids and Emerald Rapids Chun-Tse Shao
2026-05-28  0:11 ` [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms sashiko-bot
2026-05-28 21:31   ` Chun-Tse Shao
2026-06-09 18:44     ` Chun-Tse Shao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox