* [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
@ 2026-05-27 22:19 Chun-Tse Shao
2026-05-27 22:19 ` [PATCH RESEND v7 2/2] perf pmu intel: Adjust cpumasks for sub-NUMA clusters on Sapphire Rapids and Emerald Rapids Chun-Tse Shao
2026-05-28 0:11 ` [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms sashiko-bot
0 siblings, 2 replies; 5+ messages in thread
From: Chun-Tse Shao @ 2026-05-27 22:19 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim
Cc: Peter Zijlstra, Ingo Molnar, Mark Rutland, Alexander Shishkin,
Jiri Olsa, Ian Rogers, Adrian Hunter, James Clark, Zide Chen,
linux-perf-users, linux-kernel, Chun-Tse Shao
Prepare for supporting more Intel platforms with sub-NUMA clustering by
generalizing the GNR specific logic.
Reviewed-by: Zide Chen <zide.chen@intel.com>
Signed-off-by: Chun-Tse Shao <ctshao@google.com>
Assisted-by: Gemini:gemini-3.1-pro-preview
---
v7:
Fixed based on Sashiko review:
- Refactor PMU initialization to be fully thread-safe using
pthread_once and pthread_mutex locks.
- Fix a critical bounds check bug on single-socket systems.
- Fix potential divide-by-zero crash in uncore_cha_snc.
- Avoid spurious warnings on non-SNC systems.
- Avoid silent and inconsistent uncore mappings on unsupported SNC
configurations.
- Resolve checkpatch linter warnings.
v6: lore.kernel.org/20260515172710.428474-1-ctshao@google.com
Make string literal.
Add SPR into SNC2.
v5: lore.kernel.org/20260407203918.3178481-1-ctshao@google.com
Split patch.
v4: lore.kernel.org/20260402205300.1953706-1-ctshao@google.com
Rebase.
v3: lore.kernel.org/20260212223942.3832857-1-ctshao@google.com
Fix a typo.
v2: lore.kernel.org/20260205232220.1980168-1-ctshao@google.com
Split EMR and GNR in the SNC2 IMC cpu map.
v1: lore.kernel.org/20260108184430.1210223-1-ctshao@google.com
tools/perf/arch/x86/util/pmu.c | 199 +++++++++++++++++++++------------
1 file changed, 130 insertions(+), 69 deletions(-)
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 7c9d238922a6..9b00d5720fb7 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
#include <string.h>
#include <stdio.h>
#include <sys/types.h>
@@ -22,20 +23,31 @@
#include "util/env.h"
#include "util/header.h"
-static bool x86__is_intel_graniterapids(void)
-{
- static bool checked_if_graniterapids;
- static bool is_graniterapids;
+#define GENUINE_INTEL_GNR "GenuineIntel-6-A[DE]"
- if (!checked_if_graniterapids) {
- const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
- char *cpuid = get_cpuid_str((struct perf_cpu){0});
+static bool cached_snc_supported;
+static pthread_once_t snc_support_once = PTHREAD_ONCE_INIT;
- is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
- free(cpuid);
- checked_if_graniterapids = true;
+static void init_snc_support(void)
+{
+ /* Graniterapids supports SNC configuration. */
+ static const char *const supported_cpuids[] = {
+ GENUINE_INTEL_GNR, /* Graniterapids */
+ };
+ char *cpuid = get_cpuid_str((struct perf_cpu){0});
+
+ for (size_t i = 0; i < ARRAY_SIZE(supported_cpuids); i++) {
+ cached_snc_supported = cpuid && strcmp_cpuid_str(supported_cpuids[i], cpuid) == 0;
+ if (cached_snc_supported)
+ break;
}
- return is_graniterapids;
+ free(cpuid);
+}
+
+static bool x86__is_snc_supported(void)
+{
+ pthread_once(&snc_support_once, init_snc_support);
+ return cached_snc_supported;
}
static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
@@ -52,49 +64,58 @@ static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
return cpus;
}
-static int snc_nodes_per_l3_cache(void)
+static int cached_snc_nodes;
+static pthread_once_t snc_nodes_once = PTHREAD_ONCE_INIT;
+
+static void init_snc_nodes(void)
{
- static bool checked_snc;
- static int snc_nodes;
-
- if (!checked_snc) {
- struct perf_cpu_map *node_cpus =
- read_sysfs_cpu_map("devices/system/node/node0/cpulist");
- struct perf_cpu_map *cache_cpus =
- read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
-
- snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
- perf_cpu_map__put(cache_cpus);
- perf_cpu_map__put(node_cpus);
- checked_snc = true;
- }
- return snc_nodes;
+ struct perf_cpu_map *node_cpus =
+ read_sysfs_cpu_map("devices/system/node/node0/cpulist");
+ struct perf_cpu_map *cache_cpus =
+ read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
+
+ if (node_cpus && cache_cpus)
+ cached_snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
+ else
+ cached_snc_nodes = 0;
+ perf_cpu_map__put(cache_cpus);
+ perf_cpu_map__put(node_cpus);
}
-static int num_chas(void)
+static int snc_nodes_per_l3_cache(void)
{
- static bool checked_chas;
- static int num_chas;
+ pthread_once(&snc_nodes_once, init_snc_nodes);
+ return cached_snc_nodes;
+}
- if (!checked_chas) {
- int fd = perf_pmu__event_source_devices_fd();
- struct io_dir dir;
- struct io_dirent64 *dent;
+static int cached_num_chas;
+static pthread_once_t num_chas_once = PTHREAD_ONCE_INIT;
- if (fd < 0)
- return -1;
+static void init_num_chas(void)
+{
+ int fd = perf_pmu__event_source_devices_fd();
+ struct io_dir dir;
+ struct io_dirent64 *dent;
- io_dir__init(&dir, fd);
+ if (fd < 0) {
+ cached_num_chas = -1;
+ return;
+ }
- while ((dent = io_dir__readdir(&dir)) != NULL) {
- /* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */
- if (strstarts(dent->d_name, "uncore_cha_"))
- num_chas++;
- }
- close(fd);
- checked_chas = true;
+ io_dir__init(&dir, fd);
+
+ while ((dent = io_dir__readdir(&dir)) != NULL) {
+ /* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */
+ if (strstarts(dent->d_name, "uncore_cha_"))
+ cached_num_chas++;
}
- return num_chas;
+ close(fd);
+}
+
+static int num_chas(void)
+{
+ pthread_once(&num_chas_once, init_num_chas);
+ return cached_num_chas;
}
#define MAX_SNCS 6
@@ -121,46 +142,73 @@ static int uncore_cha_snc(struct perf_pmu *pmu)
return 0;
}
chas_per_node = num_cha / snc_nodes;
+ if (chas_per_node == 0) {
+ pr_warning("Unexpected: chas_per_node is 0 (num_cha=%d, snc_nodes=%d)\n",
+ num_cha, snc_nodes);
+ return 0;
+ }
cha_snc = cha_num / chas_per_node;
/* Range check cha_snc. for unexpected out of bounds. */
return cha_snc >= MAX_SNCS ? 0 : cha_snc;
}
-static int uncore_imc_snc(struct perf_pmu *pmu)
+static const u8 *cached_imc_snc_map;
+static size_t cached_imc_snc_map_len;
+static pthread_once_t imc_snc_map_once = PTHREAD_ONCE_INIT;
+
+static void init_snc_map(void)
{
- // Compute the IMC SNC using lookup tables.
- unsigned int imc_num;
int snc_nodes = snc_nodes_per_l3_cache();
- const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
- const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
- const u8 *snc_map;
- size_t snc_map_len;
+ char *cpuid;
+ static const u8 gnr_snc2_map[] = { 1, 1, 0, 0 };
+ static const u8 snc3_map[] = { 1, 1, 0, 0, 2, 2 };
switch (snc_nodes) {
case 2:
- snc_map = snc2_map;
- snc_map_len = ARRAY_SIZE(snc2_map);
+ cpuid = get_cpuid_str((struct perf_cpu){ 0 });
+ if (cpuid) {
+ if (strcmp_cpuid_str(GENUINE_INTEL_GNR, cpuid) == 0) {
+ cached_imc_snc_map = gnr_snc2_map;
+ cached_imc_snc_map_len = ARRAY_SIZE(gnr_snc2_map);
+ }
+ free(cpuid);
+ }
break;
case 3:
- snc_map = snc3_map;
- snc_map_len = ARRAY_SIZE(snc3_map);
+ cached_imc_snc_map = snc3_map;
+ cached_imc_snc_map_len = ARRAY_SIZE(snc3_map);
break;
default:
/* Error or no lookup support for SNC with >3 nodes. */
- return 0;
+ break;
}
+ if (!cached_imc_snc_map)
+ pr_warning("Unexpected: can not find snc map config\n");
+}
+
+static int uncore_imc_snc(struct perf_pmu *pmu)
+{
+ // Compute the IMC SNC using lookup tables.
+ unsigned int imc_num;
+ int snc_nodes = snc_nodes_per_l3_cache();
+
+ if (snc_nodes <= 1)
+ return 0;
+
+ pthread_once(&imc_snc_map_once, init_snc_map);
+
/* Compute SNC for PMU. */
if (sscanf(pmu->name, "uncore_imc_%u", &imc_num) != 1) {
pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
return 0;
}
- if (imc_num >= snc_map_len) {
- pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
+
+ if (!cached_imc_snc_map)
return 0;
- }
- return snc_map[imc_num];
+
+ return cached_imc_snc_map[imc_num % cached_imc_snc_map_len];
}
static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
@@ -200,7 +248,9 @@ static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
return cpu_adjust[pmu_snc];
}
-static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
+static pthread_mutex_t pmu_adjust_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
{
// With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
// topology. For example, a two socket graniterapids machine may be set
@@ -231,9 +281,11 @@ static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool
return;
}
+ pthread_mutex_lock(&pmu_adjust_mutex);
+
pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu);
if (pmu_snc == 0) {
- // No adjustment necessary for the first SNC.
+ pthread_mutex_unlock(&pmu_adjust_mutex);
return;
}
@@ -242,8 +294,10 @@ static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool
// Hold onto the perf_cpu_map globally to avoid recomputation.
cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc);
adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus));
- if (!adjusted[pmu_snc])
+ if (!adjusted[pmu_snc]) {
+ pthread_mutex_unlock(&pmu_adjust_mutex);
return;
+ }
}
perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) {
@@ -263,6 +317,8 @@ static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool
perf_cpu_map__put(pmu->cpus);
pmu->cpus = perf_cpu_map__get(adjusted[pmu_snc]);
+
+ pthread_mutex_unlock(&pmu_adjust_mutex);
}
void perf_pmu__arch_init(struct perf_pmu *pmu)
@@ -300,11 +356,16 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
pmu->mem_events = perf_mem_events_intel_aux;
else
pmu->mem_events = perf_mem_events_intel;
- } else if (x86__is_intel_graniterapids()) {
- if (strstarts(pmu->name, "uncore_cha_"))
- gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
- else if (strstarts(pmu->name, "uncore_imc_"))
- gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
+ } else if (x86__is_snc_supported()) {
+ int snc_nodes = snc_nodes_per_l3_cache();
+
+ if (snc_nodes == 2 || snc_nodes == 3) {
+ if (strstarts(pmu->name, "uncore_cha_"))
+ uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
+ else if (strstarts(pmu->name, "uncore_imc_") &&
+ !strstarts(pmu->name, "uncore_imc_free_running"))
+ uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
+ }
}
}
}
--
2.54.0.823.g6e5bcc1fc9-goog
^ permalink raw reply related [flat|nested] 5+ messages in thread* [PATCH RESEND v7 2/2] perf pmu intel: Adjust cpumasks for sub-NUMA clusters on Sapphire Rapids and Emerald Rapids
2026-05-27 22:19 [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms Chun-Tse Shao
@ 2026-05-27 22:19 ` Chun-Tse Shao
2026-05-28 0:11 ` [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms sashiko-bot
1 sibling, 0 replies; 5+ messages in thread
From: Chun-Tse Shao @ 2026-05-27 22:19 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Namhyung Kim
Cc: Peter Zijlstra, Ingo Molnar, Mark Rutland, Alexander Shishkin,
Jiri Olsa, Ian Rogers, Adrian Hunter, James Clark, Zide Chen,
linux-perf-users, linux-kernel, Chun-Tse Shao
Similar to GNR [1], Sapphire Rapids and Emerald Rapids support sub-NUMA
clusters as well. Adjust cpumasks using the same logic as GNR in [1].
Tested on Emerald Rapids with SNC2 enabled:
$ perf stat --per-node -e 'UNC_CHA_CLOCKTICKS,UNC_M_CLOCKTICKS' -a -- sleep 1
Performance counter stats for 'system wide':
N0 30 72125876670 UNC_CHA_CLOCKTICKS
N0 4 8815163648 UNC_M_CLOCKTICKS
N1 30 72124958844 UNC_CHA_CLOCKTICKS
N1 4 8815014974 UNC_M_CLOCKTICKS
N2 30 72121049022 UNC_CHA_CLOCKTICKS
N2 4 8814592626 UNC_M_CLOCKTICKS
N3 30 72117133854 UNC_CHA_CLOCKTICKS
N3 4 8814012840 UNC_M_CLOCKTICKS
1.001574118 seconds time elapsed
[1] lore.kernel.org/20250515181417.491401-1-irogers@google.com
Reviewed-by: Zide Chen <zide.chen@intel.com>
Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Chun-Tse Shao <ctshao@google.com>
Assisted-by: Gemini:gemini-3.1-pro-preview
---
tools/perf/arch/x86/util/pmu.c | 13 +++++++++++--
1 file changed, 11 insertions(+), 2 deletions(-)
diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 9b00d5720fb7..2c24ef3140da 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -23,6 +23,8 @@
#include "util/env.h"
#include "util/header.h"
+#define GENUINE_INTEL_SPR "GenuineIntel-6-8F"
+#define GENUINE_INTEL_EMR "GenuineIntel-6-CF"
#define GENUINE_INTEL_GNR "GenuineIntel-6-A[DE]"
static bool cached_snc_supported;
@@ -30,8 +32,10 @@ static pthread_once_t snc_support_once = PTHREAD_ONCE_INIT;
static void init_snc_support(void)
{
- /* Graniterapids supports SNC configuration. */
+ /* Sapphirerapids Emeraldrapids Graniterapids support SNC configuration. */
static const char *const supported_cpuids[] = {
+ GENUINE_INTEL_SPR, /* Sapphirerapids */
+ GENUINE_INTEL_EMR, /* Emeraldrapids */
GENUINE_INTEL_GNR, /* Graniterapids */
};
char *cpuid = get_cpuid_str((struct perf_cpu){0});
@@ -161,6 +165,7 @@ static void init_snc_map(void)
{
int snc_nodes = snc_nodes_per_l3_cache();
char *cpuid;
+ static const u8 spr_emr_snc2_map[] = { 0, 0, 1, 1 };
static const u8 gnr_snc2_map[] = { 1, 1, 0, 0 };
static const u8 snc3_map[] = { 1, 1, 0, 0, 2, 2 };
@@ -168,7 +173,11 @@ static void init_snc_map(void)
case 2:
cpuid = get_cpuid_str((struct perf_cpu){ 0 });
if (cpuid) {
- if (strcmp_cpuid_str(GENUINE_INTEL_GNR, cpuid) == 0) {
+ if (strcmp_cpuid_str(GENUINE_INTEL_SPR, cpuid) == 0 ||
+ strcmp_cpuid_str(GENUINE_INTEL_EMR, cpuid) == 0) {
+ cached_imc_snc_map = spr_emr_snc2_map;
+ cached_imc_snc_map_len = ARRAY_SIZE(spr_emr_snc2_map);
+ } else if (strcmp_cpuid_str(GENUINE_INTEL_GNR, cpuid) == 0) {
cached_imc_snc_map = gnr_snc2_map;
cached_imc_snc_map_len = ARRAY_SIZE(gnr_snc2_map);
}
--
2.54.0.823.g6e5bcc1fc9-goog
^ permalink raw reply related [flat|nested] 5+ messages in thread* Re: [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
2026-05-27 22:19 [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms Chun-Tse Shao
2026-05-27 22:19 ` [PATCH RESEND v7 2/2] perf pmu intel: Adjust cpumasks for sub-NUMA clusters on Sapphire Rapids and Emerald Rapids Chun-Tse Shao
@ 2026-05-28 0:11 ` sashiko-bot
2026-05-28 21:31 ` Chun-Tse Shao
1 sibling, 1 reply; 5+ messages in thread
From: sashiko-bot @ 2026-05-28 0:11 UTC (permalink / raw)
To: Chun-Tse Shao; +Cc: linux-perf-users
Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
Pre-existing issues:
- [Critical] An integer overflow in `uncore_cha_snc()` can result in a negative Sub-NUMA Clustering (SNC) node index. This bypasses the upper-bound check against `MAX_SNCS` and causes subsequent out-of-bounds array accesses.
--
commit be394e8b7ddbe730c3fc40fb608337fe19c7b33b
Author: Chun-Tse Shao <ctshao@google.com>
perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
This commit generalizes the Sub-NUMA Clustering (SNC) logic previously
specific to Granite Rapids to support multiple Intel platforms. It uses
CPUID to determine SNC support and caches various PMU configurations
to improve thread safety.
> diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> index 7c9d238922a6c..9b00d5720fb7a 100644
> --- a/tools/perf/arch/x86/util/pmu.c
> +++ b/tools/perf/arch/x86/util/pmu.c
[ ... ]
> @@ -121,46 +142,73 @@ static int uncore_cha_snc(struct perf_pmu *pmu)
> return 0;
> }
> chas_per_node = num_cha / snc_nodes;
> + if (chas_per_node == 0) {
> + pr_warning("Unexpected: chas_per_node is 0 (num_cha=%d, snc_nodes=%d)\n",
> + num_cha, snc_nodes);
> + return 0;
> + }
> cha_snc = cha_num / chas_per_node;
>
> /* Range check cha_snc. for unexpected out of bounds. */
> return cha_snc >= MAX_SNCS ? 0 : cha_snc;
[Severity: Critical]
This is a pre-existing issue, but does this range check safely handle very
large values for cha_num?
Since cha_num is parsed as an unsigned int via sscanf() from the PMU name,
and cha_snc is a signed int, a sufficiently large cha_num could cause the
division result to overflow into a negative value.
If cha_snc becomes negative, the cha_snc >= MAX_SNCS check evaluates to
false, allowing the function to return a negative index.
Could this negative index then cause out-of-bounds array accesses when used
later in uncore_cha_imc_compute_cpu_adjust()?
uncore_cha_imc_compute_cpu_adjust() {
static bool checked_cpu_adjust[MAX_SNCS];
...
if (checked_cpu_adjust[pmu_snc])
...
}
--
Sashiko AI review · https://sashiko.dev/#/patchset/20260527221934.3830896-1-ctshao@google.com?part=1
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
2026-05-28 0:11 ` [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms sashiko-bot
@ 2026-05-28 21:31 ` Chun-Tse Shao
2026-06-09 18:44 ` Chun-Tse Shao
0 siblings, 1 reply; 5+ messages in thread
From: Chun-Tse Shao @ 2026-05-28 21:31 UTC (permalink / raw)
To: sashiko-reviews; +Cc: linux-perf-users
On Wed, May 27, 2026 at 5:11 PM <sashiko-bot@kernel.org> wrote:
>
> Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
>
> Pre-existing issues:
> - [Critical] An integer overflow in `uncore_cha_snc()` can result in a negative Sub-NUMA Clustering (SNC) node index. This bypasses the upper-bound check against `MAX_SNCS` and causes subsequent out-of-bounds array accesses.
> --
>
> commit be394e8b7ddbe730c3fc40fb608337fe19c7b33b
> Author: Chun-Tse Shao <ctshao@google.com>
>
> perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
>
> This commit generalizes the Sub-NUMA Clustering (SNC) logic previously
> specific to Granite Rapids to support multiple Intel platforms. It uses
> CPUID to determine SNC support and caches various PMU configurations
> to improve thread safety.
>
> > diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> > index 7c9d238922a6c..9b00d5720fb7a 100644
> > --- a/tools/perf/arch/x86/util/pmu.c
> > +++ b/tools/perf/arch/x86/util/pmu.c
>
> [ ... ]
>
> > @@ -121,46 +142,73 @@ static int uncore_cha_snc(struct perf_pmu *pmu)
> > return 0;
> > }
> > chas_per_node = num_cha / snc_nodes;
> > + if (chas_per_node == 0) {
> > + pr_warning("Unexpected: chas_per_node is 0 (num_cha=%d, snc_nodes=%d)\n",
> > + num_cha, snc_nodes);
> > + return 0;
> > + }
> > cha_snc = cha_num / chas_per_node;
> >
> > /* Range check cha_snc. for unexpected out of bounds. */
> > return cha_snc >= MAX_SNCS ? 0 : cha_snc;
>
> [Severity: Critical]
> This is a pre-existing issue, but does this range check safely handle very
> large values for cha_num?
>
> Since cha_num is parsed as an unsigned int via sscanf() from the PMU name,
> and cha_snc is a signed int, a sufficiently large cha_num could cause the
> division result to overflow into a negative value.
I think it is very unlikely we would see cha_num > INT_MAX.
Since that is a pre-existing issue, I prefer to fix it separately.
Thanks,
CT
>
> If cha_snc becomes negative, the cha_snc >= MAX_SNCS check evaluates to
> false, allowing the function to return a negative index.
>
> Could this negative index then cause out-of-bounds array accesses when used
> later in uncore_cha_imc_compute_cpu_adjust()?
>
> uncore_cha_imc_compute_cpu_adjust() {
> static bool checked_cpu_adjust[MAX_SNCS];
> ...
> if (checked_cpu_adjust[pmu_snc])
> ...
> }
>
> --
> Sashiko AI review · https://sashiko.dev/#/patchset/20260527221934.3830896-1-ctshao@google.com?part=1
^ permalink raw reply [flat|nested] 5+ messages in thread* Re: [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
2026-05-28 21:31 ` Chun-Tse Shao
@ 2026-06-09 18:44 ` Chun-Tse Shao
0 siblings, 0 replies; 5+ messages in thread
From: Chun-Tse Shao @ 2026-06-09 18:44 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo, Ian Rogers, Namhyung Kim
Cc: linux-perf-users, Peter Zijlstra, Ingo Molnar, Mark Rutland,
Alexander Shishkin, Jiri Olsa, Adrian Hunter, James Clark,
Zide Chen, Linux Kernel Mailing List, Chun-Tse Shao
Ping for attention.
Thanks,
CT
On Thu, May 28, 2026 at 2:31 PM Chun-Tse Shao <ctshao@google.com> wrote:
>
> On Wed, May 27, 2026 at 5:11 PM <sashiko-bot@kernel.org> wrote:
> >
> > Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider:
> >
> > Pre-existing issues:
> > - [Critical] An integer overflow in `uncore_cha_snc()` can result in a negative Sub-NUMA Clustering (SNC) node index. This bypasses the upper-bound check against `MAX_SNCS` and causes subsequent out-of-bounds array accesses.
> > --
> >
> > commit be394e8b7ddbe730c3fc40fb608337fe19c7b33b
> > Author: Chun-Tse Shao <ctshao@google.com>
> >
> > perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
> >
> > This commit generalizes the Sub-NUMA Clustering (SNC) logic previously
> > specific to Granite Rapids to support multiple Intel platforms. It uses
> > CPUID to determine SNC support and caches various PMU configurations
> > to improve thread safety.
> >
> > > diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> > > index 7c9d238922a6c..9b00d5720fb7a 100644
> > > --- a/tools/perf/arch/x86/util/pmu.c
> > > +++ b/tools/perf/arch/x86/util/pmu.c
> >
> > [ ... ]
> >
> > > @@ -121,46 +142,73 @@ static int uncore_cha_snc(struct perf_pmu *pmu)
> > > return 0;
> > > }
> > > chas_per_node = num_cha / snc_nodes;
> > > + if (chas_per_node == 0) {
> > > + pr_warning("Unexpected: chas_per_node is 0 (num_cha=%d, snc_nodes=%d)\n",
> > > + num_cha, snc_nodes);
> > > + return 0;
> > > + }
> > > cha_snc = cha_num / chas_per_node;
> > >
> > > /* Range check cha_snc. for unexpected out of bounds. */
> > > return cha_snc >= MAX_SNCS ? 0 : cha_snc;
> >
> > [Severity: Critical]
> > This is a pre-existing issue, but does this range check safely handle very
> > large values for cha_num?
> >
> > Since cha_num is parsed as an unsigned int via sscanf() from the PMU name,
> > and cha_snc is a signed int, a sufficiently large cha_num could cause the
> > division result to overflow into a negative value.
>
> I think it is very unlikely we would see cha_num > INT_MAX.
> Since that is a pre-existing issue, I prefer to fix it separately.
>
> Thanks,
> CT
>
> >
> > If cha_snc becomes negative, the cha_snc >= MAX_SNCS check evaluates to
> > false, allowing the function to return a negative index.
> >
> > Could this negative index then cause out-of-bounds array accesses when used
> > later in uncore_cha_imc_compute_cpu_adjust()?
> >
> > uncore_cha_imc_compute_cpu_adjust() {
> > static bool checked_cpu_adjust[MAX_SNCS];
> > ...
> > if (checked_cpu_adjust[pmu_snc])
> > ...
> > }
> >
> > --
> > Sashiko AI review · https://sashiko.dev/#/patchset/20260527221934.3830896-1-ctshao@google.com?part=1
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2026-06-09 18:44 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-05-27 22:19 [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms Chun-Tse Shao
2026-05-27 22:19 ` [PATCH RESEND v7 2/2] perf pmu intel: Adjust cpumasks for sub-NUMA clusters on Sapphire Rapids and Emerald Rapids Chun-Tse Shao
2026-05-28 0:11 ` [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms sashiko-bot
2026-05-28 21:31 ` Chun-Tse Shao
2026-06-09 18:44 ` Chun-Tse Shao
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox