Linux Perf Users
 help / color / mirror / Atom feed
From: Chun-Tse Shao <ctshao@google.com>
To: Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	 Mark Rutland <mark.rutland@arm.com>,
	 Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@kernel.org>,  Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	 James Clark <james.clark@linaro.org>,
	Zide Chen <zide.chen@intel.com>,
	 linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	 Chun-Tse Shao <ctshao@google.com>
Subject: [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms
Date: Wed, 27 May 2026 15:19:33 -0700	[thread overview]
Message-ID: <20260527221934.3830896-1-ctshao@google.com> (raw)

Prepare for supporting more Intel platforms with sub-NUMA clustering by
generalizing the GNR specific logic.

Reviewed-by: Zide Chen <zide.chen@intel.com>
Signed-off-by: Chun-Tse Shao <ctshao@google.com>
Assisted-by: Gemini:gemini-3.1-pro-preview
---
v7:
  Fixed based on Sashiko review:
  - Refactor PMU initialization to be fully thread-safe using
    pthread_once and pthread_mutex locks.
  - Fix a critical bounds check bug on single-socket systems.
  - Fix potential divide-by-zero crash in uncore_cha_snc.
  - Avoid spurious warnings on non-SNC systems.
  - Avoid silent and inconsistent uncore mappings on unsupported SNC
    configurations.
  - Resolve checkpatch linter warnings.

v6: lore.kernel.org/20260515172710.428474-1-ctshao@google.com
  Make string literal.
  Add SPR into SNC2.

v5: lore.kernel.org/20260407203918.3178481-1-ctshao@google.com
  Split patch.

v4: lore.kernel.org/20260402205300.1953706-1-ctshao@google.com
  Rebase.

v3: lore.kernel.org/20260212223942.3832857-1-ctshao@google.com
  Fix a typo.

v2: lore.kernel.org/20260205232220.1980168-1-ctshao@google.com
  Split EMR and GNR in the SNC2 IMC cpu map.

v1: lore.kernel.org/20260108184430.1210223-1-ctshao@google.com

 tools/perf/arch/x86/util/pmu.c | 199 +++++++++++++++++++++------------
 1 file changed, 130 insertions(+), 69 deletions(-)

diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index 7c9d238922a6..9b00d5720fb7 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
 #include <string.h>
 #include <stdio.h>
 #include <sys/types.h>
@@ -22,20 +23,31 @@
 #include "util/env.h"
 #include "util/header.h"

-static bool x86__is_intel_graniterapids(void)
-{
-	static bool checked_if_graniterapids;
-	static bool is_graniterapids;
+#define GENUINE_INTEL_GNR "GenuineIntel-6-A[DE]"

-	if (!checked_if_graniterapids) {
-		const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
-		char *cpuid = get_cpuid_str((struct perf_cpu){0});
+static bool cached_snc_supported;
+static pthread_once_t snc_support_once = PTHREAD_ONCE_INIT;

-		is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
-		free(cpuid);
-		checked_if_graniterapids = true;
+static void init_snc_support(void)
+{
+	/* Graniterapids supports SNC configuration. */
+	static const char *const supported_cpuids[] = {
+		GENUINE_INTEL_GNR, /* Graniterapids */
+	};
+	char *cpuid = get_cpuid_str((struct perf_cpu){0});
+
+	for (size_t i = 0; i < ARRAY_SIZE(supported_cpuids); i++) {
+		cached_snc_supported = cpuid && strcmp_cpuid_str(supported_cpuids[i], cpuid) == 0;
+		if (cached_snc_supported)
+			break;
 	}
-	return is_graniterapids;
+	free(cpuid);
+}
+
+static bool x86__is_snc_supported(void)
+{
+	pthread_once(&snc_support_once, init_snc_support);
+	return cached_snc_supported;
 }

 static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
@@ -52,49 +64,58 @@ static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
 	return cpus;
 }

-static int snc_nodes_per_l3_cache(void)
+static int cached_snc_nodes;
+static pthread_once_t snc_nodes_once = PTHREAD_ONCE_INIT;
+
+static void init_snc_nodes(void)
 {
-	static bool checked_snc;
-	static int snc_nodes;
-
-	if (!checked_snc) {
-		struct perf_cpu_map *node_cpus =
-			read_sysfs_cpu_map("devices/system/node/node0/cpulist");
-		struct perf_cpu_map *cache_cpus =
-			read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
-
-		snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
-		perf_cpu_map__put(cache_cpus);
-		perf_cpu_map__put(node_cpus);
-		checked_snc = true;
-	}
-	return snc_nodes;
+	struct perf_cpu_map *node_cpus =
+		read_sysfs_cpu_map("devices/system/node/node0/cpulist");
+	struct perf_cpu_map *cache_cpus =
+		read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
+
+	if (node_cpus && cache_cpus)
+		cached_snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
+	else
+		cached_snc_nodes = 0;
+	perf_cpu_map__put(cache_cpus);
+	perf_cpu_map__put(node_cpus);
 }

-static int num_chas(void)
+static int snc_nodes_per_l3_cache(void)
 {
-	static bool checked_chas;
-	static int num_chas;
+	pthread_once(&snc_nodes_once, init_snc_nodes);
+	return cached_snc_nodes;
+}

-	if (!checked_chas) {
-		int fd = perf_pmu__event_source_devices_fd();
-		struct io_dir dir;
-		struct io_dirent64 *dent;
+static int cached_num_chas;
+static pthread_once_t num_chas_once = PTHREAD_ONCE_INIT;

-		if (fd < 0)
-			return -1;
+static void init_num_chas(void)
+{
+	int fd = perf_pmu__event_source_devices_fd();
+	struct io_dir dir;
+	struct io_dirent64 *dent;

-		io_dir__init(&dir, fd);
+	if (fd < 0) {
+		cached_num_chas = -1;
+		return;
+	}

-		while ((dent = io_dir__readdir(&dir)) != NULL) {
-			/* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */
-			if (strstarts(dent->d_name, "uncore_cha_"))
-				num_chas++;
-		}
-		close(fd);
-		checked_chas = true;
+	io_dir__init(&dir, fd);
+
+	while ((dent = io_dir__readdir(&dir)) != NULL) {
+		/* Note, dent->d_type will be DT_LNK and so isn't a useful filter. */
+		if (strstarts(dent->d_name, "uncore_cha_"))
+			cached_num_chas++;
 	}
-	return num_chas;
+	close(fd);
+}
+
+static int num_chas(void)
+{
+	pthread_once(&num_chas_once, init_num_chas);
+	return cached_num_chas;
 }

 #define MAX_SNCS 6
@@ -121,46 +142,73 @@ static int uncore_cha_snc(struct perf_pmu *pmu)
 		return 0;
 	}
 	chas_per_node = num_cha / snc_nodes;
+	if (chas_per_node == 0) {
+		pr_warning("Unexpected: chas_per_node is 0 (num_cha=%d, snc_nodes=%d)\n",
+			   num_cha, snc_nodes);
+		return 0;
+	}
 	cha_snc = cha_num / chas_per_node;

 	/* Range check cha_snc. for unexpected out of bounds. */
 	return cha_snc >= MAX_SNCS ? 0 : cha_snc;
 }

-static int uncore_imc_snc(struct perf_pmu *pmu)
+static const u8 *cached_imc_snc_map;
+static size_t cached_imc_snc_map_len;
+static pthread_once_t imc_snc_map_once = PTHREAD_ONCE_INIT;
+
+static void init_snc_map(void)
 {
-	// Compute the IMC SNC using lookup tables.
-	unsigned int imc_num;
 	int snc_nodes = snc_nodes_per_l3_cache();
-	const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
-	const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
-	const u8 *snc_map;
-	size_t snc_map_len;
+	char *cpuid;
+	static const u8 gnr_snc2_map[] = { 1, 1, 0, 0 };
+	static const u8 snc3_map[] = { 1, 1, 0, 0, 2, 2 };

 	switch (snc_nodes) {
 	case 2:
-		snc_map = snc2_map;
-		snc_map_len = ARRAY_SIZE(snc2_map);
+		cpuid = get_cpuid_str((struct perf_cpu){ 0 });
+		if (cpuid) {
+			if (strcmp_cpuid_str(GENUINE_INTEL_GNR, cpuid) == 0) {
+				cached_imc_snc_map = gnr_snc2_map;
+				cached_imc_snc_map_len = ARRAY_SIZE(gnr_snc2_map);
+			}
+			free(cpuid);
+		}
 		break;
 	case 3:
-		snc_map = snc3_map;
-		snc_map_len = ARRAY_SIZE(snc3_map);
+		cached_imc_snc_map = snc3_map;
+		cached_imc_snc_map_len = ARRAY_SIZE(snc3_map);
 		break;
 	default:
 		/* Error or no lookup support for SNC with >3 nodes. */
-		return 0;
+		break;
 	}

+	if (!cached_imc_snc_map)
+		pr_warning("Unexpected: can not find snc map config\n");
+}
+
+static int uncore_imc_snc(struct perf_pmu *pmu)
+{
+	// Compute the IMC SNC using lookup tables.
+	unsigned int imc_num;
+	int snc_nodes = snc_nodes_per_l3_cache();
+
+	if (snc_nodes <= 1)
+		return 0;
+
+	pthread_once(&imc_snc_map_once, init_snc_map);
+
 	/* Compute SNC for PMU. */
 	if (sscanf(pmu->name, "uncore_imc_%u", &imc_num) != 1) {
 		pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
 		return 0;
 	}
-	if (imc_num >= snc_map_len) {
-		pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
+
+	if (!cached_imc_snc_map)
 		return 0;
-	}
-	return snc_map[imc_num];
+
+	return cached_imc_snc_map[imc_num % cached_imc_snc_map_len];
 }

 static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
@@ -200,7 +248,9 @@ static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
 	return cpu_adjust[pmu_snc];
 }

-static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
+static pthread_mutex_t pmu_adjust_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
 {
 	// With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
 	// topology. For example, a two socket graniterapids machine may be set
@@ -231,9 +281,11 @@ static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool
 		return;
 	}

+	pthread_mutex_lock(&pmu_adjust_mutex);
+
 	pmu_snc = cha ? uncore_cha_snc(pmu) : uncore_imc_snc(pmu);
 	if (pmu_snc == 0) {
-		// No adjustment necessary for the first SNC.
+		pthread_mutex_unlock(&pmu_adjust_mutex);
 		return;
 	}

@@ -242,8 +294,10 @@ static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool
 		// Hold onto the perf_cpu_map globally to avoid recomputation.
 		cpu_adjust = uncore_cha_imc_compute_cpu_adjust(pmu_snc);
 		adjusted[pmu_snc] = perf_cpu_map__empty_new(perf_cpu_map__nr(pmu->cpus));
-		if (!adjusted[pmu_snc])
+		if (!adjusted[pmu_snc]) {
+			pthread_mutex_unlock(&pmu_adjust_mutex);
 			return;
+		}
 	}

 	perf_cpu_map__for_each_cpu(cpu, idx, pmu->cpus) {
@@ -263,6 +317,8 @@ static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool

 	perf_cpu_map__put(pmu->cpus);
 	pmu->cpus = perf_cpu_map__get(adjusted[pmu_snc]);
+
+	pthread_mutex_unlock(&pmu_adjust_mutex);
 }

 void perf_pmu__arch_init(struct perf_pmu *pmu)
@@ -300,11 +356,16 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
 				pmu->mem_events = perf_mem_events_intel_aux;
 			else
 				pmu->mem_events = perf_mem_events_intel;
-		} else if (x86__is_intel_graniterapids()) {
-			if (strstarts(pmu->name, "uncore_cha_"))
-				gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
-			else if (strstarts(pmu->name, "uncore_imc_"))
-				gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
+		} else if (x86__is_snc_supported()) {
+			int snc_nodes = snc_nodes_per_l3_cache();
+
+			if (snc_nodes == 2 || snc_nodes == 3) {
+				if (strstarts(pmu->name, "uncore_cha_"))
+					uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
+				else if (strstarts(pmu->name, "uncore_imc_") &&
+					 !strstarts(pmu->name, "uncore_imc_free_running"))
+					uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
+			}
 		}
 	}
 }
--
2.54.0.823.g6e5bcc1fc9-goog


             reply	other threads:[~2026-05-27 22:19 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-27 22:19 Chun-Tse Shao [this message]
2026-05-27 22:19 ` [PATCH RESEND v7 2/2] perf pmu intel: Adjust cpumasks for sub-NUMA clusters on Sapphire Rapids and Emerald Rapids Chun-Tse Shao
2026-05-28  0:11 ` [PATCH RESEND v7 1/2] perf pmu intel: Generalize SNC cpumask adjustment for multiple platforms sashiko-bot
2026-05-28 21:31   ` Chun-Tse Shao
2026-06-09 18:44     ` Chun-Tse Shao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260527221934.3830896-1-ctshao@google.com \
    --to=ctshao@google.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=irogers@google.com \
    --cc=james.clark@linaro.org \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=zide.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox