public inbox for linux-perf-users@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3] perf pmu intel: Adjust cpumaks for sub-NUMA clusters on Emeraldrapids
@ 2026-02-12 22:39 Chun-Tse Shao
  2026-02-13  0:08 ` Chen, Zide
  0 siblings, 1 reply; 5+ messages in thread
From: Chun-Tse Shao @ 2026-02-12 22:39 UTC (permalink / raw)
  To: linux-kernel
  Cc: Chun-Tse Shao, peterz, mingo, acme, namhyung, mark.rutland,
	alexander.shishkin, jolsa, irogers, adrian.hunter, james.clark,
	ravi.bangoria, linux-perf-users

Similar to GNR [1], Emeraldrapids supports sub-NUMA clusters as well.
Adjust cpumasks as the logic for GNR in [1].

Tested on Emeraldrapids with SNC2 enabled:
  $ perf stat --per-node -e 'UNC_CHA_CLOCKTICKS,UNC_M_CLOCKTICKS' -a -- sleep 1

   Performance counter stats for 'system wide':

  N0       30        72125876670      UNC_CHA_CLOCKTICKS
  N0        4         8815163648      UNC_M_CLOCKTICKS
  N1       30        72124958844      UNC_CHA_CLOCKTICKS
  N1        4         8815014974      UNC_M_CLOCKTICKS
  N2       30        72121049022      UNC_CHA_CLOCKTICKS
  N2        4         8814592626      UNC_M_CLOCKTICKS
  N3       30        72117133854      UNC_CHA_CLOCKTICKS
  N3        4         8814012840      UNC_M_CLOCKTICKS

         1.001574118 seconds time elapsed

[1] lore.kernel.org/20250515181417.491401-1-irogers@google.com

Signed-off-by: Chun-Tse Shao <ctshao@google.com>
---
v3:
  Fix a typo.

v2: lore.kernel.org/20260205232220.1980168-1-ctshao@google.com
  Split EMR and GNR in the SNC2 IMC cpu map.

v1: lore.kernel.org/20260108184430.1210223-1-ctshao@google.com

 tools/perf/arch/x86/util/pmu.c | 95 ++++++++++++++++++++++------------
 1 file changed, 63 insertions(+), 32 deletions(-)

diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
index a3f96221758d..2215984349f5 100644
--- a/tools/perf/arch/x86/util/pmu.c
+++ b/tools/perf/arch/x86/util/pmu.c
@@ -22,20 +22,29 @@
 #include "util/env.h"
 #include "util/header.h"

-static bool x86__is_intel_graniterapids(void)
+static bool x86__is_snc_supported(void)
 {
-	static bool checked_if_graniterapids;
-	static bool is_graniterapids;
+	static bool checked_if_snc_supported;
+	static bool is_supported;

-	if (!checked_if_graniterapids) {
-		const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
+	if (!checked_if_snc_supported) {
+
+		/* Emeraldrapids and Graniterapids support SNC configuration. */
+		static const char *const supported_cpuids[] = {
+			"GenuineIntel-6-CF", /* Emeraldrapids */
+			"GenuineIntel-6-A[DE]", /* Graniterapids */
+		};
 		char *cpuid = get_cpuid_str((struct perf_cpu){0});

-		is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
+		for (size_t i = 0; i < ARRAY_SIZE(supported_cpuids); i++) {
+			is_supported = cpuid && strcmp_cpuid_str(supported_cpuids[i], cpuid) == 0;
+			if (is_supported)
+				break;
+		}
 		free(cpuid);
-		checked_if_graniterapids = true;
+		checked_if_snc_supported = true;
 	}
-	return is_graniterapids;
+	return is_supported;
 }

 static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
@@ -64,6 +73,7 @@ static int snc_nodes_per_l3_cache(void)
 			read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");

 		snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
+
 		perf_cpu_map__put(cache_cpus);
 		perf_cpu_map__put(node_cpus);
 		checked_snc = true;
@@ -137,23 +147,42 @@ static int uncore_imc_snc(struct perf_pmu *pmu)
 	// Compute the IMC SNC using lookup tables.
 	unsigned int imc_num;
 	int snc_nodes = snc_nodes_per_l3_cache();
-	const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
-	const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
-	const u8 *snc_map;
-	size_t snc_map_len;
-
-	switch (snc_nodes) {
-	case 2:
-		snc_map = snc2_map;
-		snc_map_len = ARRAY_SIZE(snc2_map);
-		break;
-	case 3:
-		snc_map = snc3_map;
-		snc_map_len = ARRAY_SIZE(snc3_map);
-		break;
-	default:
-		/* Error or no lookup support for SNC with >3 nodes. */
-		return 0;
+	char *cpuid;
+	static const u8 emr_snc2_map[] = { 0, 0, 1, 1 };
+	static const u8 gnr_snc2_map[] = { 1, 1, 0, 0 };
+	static const u8 snc3_map[] = { 1, 1, 0, 0, 2, 2 };
+	static const u8 *snc_map;
+	static size_t snc_map_len;
+
+	/* snc_map is not inited yet. We only look up once to avoid expensive operations. */
+	if (!snc_map) {
+		switch (snc_nodes) {
+		case 2:
+			cpuid = get_cpuid_str((struct perf_cpu){ 0 });
+			if (cpuid) {
+				if (strcmp_cpuid_str("GenuineIntel-6-CF", cpuid) == 0) {
+					snc_map = emr_snc2_map;
+					snc_map_len = ARRAY_SIZE(emr_snc2_map);
+				} else if (strcmp_cpuid_str("GenuineIntel-6-A[DE]", cpuid) == 0) {
+					snc_map = gnr_snc2_map;
+					snc_map_len = ARRAY_SIZE(gnr_snc2_map);
+				}
+				free(cpuid);
+			}
+			break;
+		case 3:
+			snc_map = snc3_map;
+			snc_map_len = ARRAY_SIZE(snc3_map);
+			break;
+		default:
+			/* Error or no lookup support for SNC with >3 nodes. */
+			return 0;
+		}
+
+		if (!snc_map) {
+			pr_warning("Unexpected: can not find snc map config");
+			return 0;
+		}
 	}

 	/* Compute SNC for PMU. */
@@ -161,11 +190,12 @@ static int uncore_imc_snc(struct perf_pmu *pmu)
 		pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
 		return 0;
 	}
-	if (imc_num >= snc_map_len) {
+	if (imc_num >= snc_map_len * perf_cpu_map__nr(pmu->cpus)) {
 		pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
 		return 0;
 	}
-	return snc_map[imc_num];
+
+	return snc_map[imc_num % snc_map_len];
 }

 static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
@@ -205,7 +235,7 @@ static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
 	return cpu_adjust[pmu_snc];
 }

-static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
+static void uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
 {
 	// With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
 	// topology. For example, a two socket graniterapids machine may be set
@@ -304,11 +334,12 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
 				pmu->mem_events = perf_mem_events_intel_aux;
 			else
 				pmu->mem_events = perf_mem_events_intel;
-		} else if (x86__is_intel_graniterapids()) {
+		} else if (x86__is_snc_supported()) {
 			if (starts_with(pmu->name, "uncore_cha_"))
-				gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
-			else if (starts_with(pmu->name, "uncore_imc_"))
-				gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
+				uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
+			else if (starts_with(pmu->name, "uncore_imc_") &&
+				 !starts_with(pmu->name, "uncore_imc_free_running"))
+				uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
 		}
 	}
 }
--
2.53.0.273.g2a3d683680-goog


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] perf pmu intel: Adjust cpumaks for sub-NUMA clusters on Emeraldrapids
  2026-02-12 22:39 [PATCH v3] perf pmu intel: Adjust cpumaks for sub-NUMA clusters on Emeraldrapids Chun-Tse Shao
@ 2026-02-13  0:08 ` Chen, Zide
  2026-04-01 16:38   ` Ian Rogers
  0 siblings, 1 reply; 5+ messages in thread
From: Chen, Zide @ 2026-02-13  0:08 UTC (permalink / raw)
  To: Chun-Tse Shao, linux-kernel
  Cc: peterz, mingo, acme, namhyung, mark.rutland, alexander.shishkin,
	jolsa, irogers, adrian.hunter, james.clark, ravi.bangoria,
	linux-perf-users



On 2/12/2026 2:39 PM, Chun-Tse Shao wrote:
> Similar to GNR [1], Emeraldrapids supports sub-NUMA clusters as well.
> Adjust cpumasks as the logic for GNR in [1].
> 
> Tested on Emeraldrapids with SNC2 enabled:
>   $ perf stat --per-node -e 'UNC_CHA_CLOCKTICKS,UNC_M_CLOCKTICKS' -a -- sleep 1
> 
>    Performance counter stats for 'system wide':
> 
>   N0       30        72125876670      UNC_CHA_CLOCKTICKS
>   N0        4         8815163648      UNC_M_CLOCKTICKS
>   N1       30        72124958844      UNC_CHA_CLOCKTICKS
>   N1        4         8815014974      UNC_M_CLOCKTICKS
>   N2       30        72121049022      UNC_CHA_CLOCKTICKS
>   N2        4         8814592626      UNC_M_CLOCKTICKS
>   N3       30        72117133854      UNC_CHA_CLOCKTICKS
>   N3        4         8814012840      UNC_M_CLOCKTICKS
> 
>          1.001574118 seconds time elapsed
> 
> [1] lore.kernel.org/20250515181417.491401-1-irogers@google.com
> 
> Signed-off-by: Chun-Tse Shao <ctshao@google.com>

LGTM.

Reviewed-by: Zide Chen <zide.chen@intel.com>

> ---
> v3:
>   Fix a typo.
> 
> v2: lore.kernel.org/20260205232220.1980168-1-ctshao@google.com
>   Split EMR and GNR in the SNC2 IMC cpu map.
> 
> v1: lore.kernel.org/20260108184430.1210223-1-ctshao@google.com
> 
>  tools/perf/arch/x86/util/pmu.c | 95 ++++++++++++++++++++++------------
>  1 file changed, 63 insertions(+), 32 deletions(-)
> 
> diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> index a3f96221758d..2215984349f5 100644
> --- a/tools/perf/arch/x86/util/pmu.c
> +++ b/tools/perf/arch/x86/util/pmu.c
> @@ -22,20 +22,29 @@
>  #include "util/env.h"
>  #include "util/header.h"
> 
> -static bool x86__is_intel_graniterapids(void)
> +static bool x86__is_snc_supported(void)
>  {
> -	static bool checked_if_graniterapids;
> -	static bool is_graniterapids;
> +	static bool checked_if_snc_supported;
> +	static bool is_supported;
> 
> -	if (!checked_if_graniterapids) {
> -		const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
> +	if (!checked_if_snc_supported) {
> +
> +		/* Emeraldrapids and Graniterapids support SNC configuration. */
> +		static const char *const supported_cpuids[] = {
> +			"GenuineIntel-6-CF", /* Emeraldrapids */
> +			"GenuineIntel-6-A[DE]", /* Graniterapids */
> +		};
>  		char *cpuid = get_cpuid_str((struct perf_cpu){0});
> 
> -		is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
> +		for (size_t i = 0; i < ARRAY_SIZE(supported_cpuids); i++) {
> +			is_supported = cpuid && strcmp_cpuid_str(supported_cpuids[i], cpuid) == 0;
> +			if (is_supported)
> +				break;
> +		}
>  		free(cpuid);
> -		checked_if_graniterapids = true;
> +		checked_if_snc_supported = true;
>  	}
> -	return is_graniterapids;
> +	return is_supported;
>  }
> 
>  static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
> @@ -64,6 +73,7 @@ static int snc_nodes_per_l3_cache(void)
>  			read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
> 
>  		snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
> +
>  		perf_cpu_map__put(cache_cpus);
>  		perf_cpu_map__put(node_cpus);
>  		checked_snc = true;
> @@ -137,23 +147,42 @@ static int uncore_imc_snc(struct perf_pmu *pmu)
>  	// Compute the IMC SNC using lookup tables.
>  	unsigned int imc_num;
>  	int snc_nodes = snc_nodes_per_l3_cache();
> -	const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
> -	const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
> -	const u8 *snc_map;
> -	size_t snc_map_len;
> -
> -	switch (snc_nodes) {
> -	case 2:
> -		snc_map = snc2_map;
> -		snc_map_len = ARRAY_SIZE(snc2_map);
> -		break;
> -	case 3:
> -		snc_map = snc3_map;
> -		snc_map_len = ARRAY_SIZE(snc3_map);
> -		break;
> -	default:
> -		/* Error or no lookup support for SNC with >3 nodes. */
> -		return 0;
> +	char *cpuid;
> +	static const u8 emr_snc2_map[] = { 0, 0, 1, 1 };
> +	static const u8 gnr_snc2_map[] = { 1, 1, 0, 0 };
> +	static const u8 snc3_map[] = { 1, 1, 0, 0, 2, 2 };
> +	static const u8 *snc_map;
> +	static size_t snc_map_len;
> +
> +	/* snc_map is not inited yet. We only look up once to avoid expensive operations. */
> +	if (!snc_map) {
> +		switch (snc_nodes) {
> +		case 2:
> +			cpuid = get_cpuid_str((struct perf_cpu){ 0 });
> +			if (cpuid) {
> +				if (strcmp_cpuid_str("GenuineIntel-6-CF", cpuid) == 0) {
> +					snc_map = emr_snc2_map;
> +					snc_map_len = ARRAY_SIZE(emr_snc2_map);
> +				} else if (strcmp_cpuid_str("GenuineIntel-6-A[DE]", cpuid) == 0) {
> +					snc_map = gnr_snc2_map;
> +					snc_map_len = ARRAY_SIZE(gnr_snc2_map);
> +				}
> +				free(cpuid);
> +			}
> +			break;
> +		case 3:
> +			snc_map = snc3_map;
> +			snc_map_len = ARRAY_SIZE(snc3_map);
> +			break;
> +		default:
> +			/* Error or no lookup support for SNC with >3 nodes. */
> +			return 0;
> +		}
> +
> +		if (!snc_map) {
> +			pr_warning("Unexpected: can not find snc map config");
> +			return 0;
> +		}
>  	}
> 
>  	/* Compute SNC for PMU. */
> @@ -161,11 +190,12 @@ static int uncore_imc_snc(struct perf_pmu *pmu)
>  		pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
>  		return 0;
>  	}
> -	if (imc_num >= snc_map_len) {
> +	if (imc_num >= snc_map_len * perf_cpu_map__nr(pmu->cpus)) {
>  		pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
>  		return 0;
>  	}
> -	return snc_map[imc_num];
> +
> +	return snc_map[imc_num % snc_map_len];
>  }
> 
>  static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
> @@ -205,7 +235,7 @@ static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
>  	return cpu_adjust[pmu_snc];
>  }
> 
> -static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
> +static void uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
>  {
>  	// With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
>  	// topology. For example, a two socket graniterapids machine may be set
> @@ -304,11 +334,12 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
>  				pmu->mem_events = perf_mem_events_intel_aux;
>  			else
>  				pmu->mem_events = perf_mem_events_intel;
> -		} else if (x86__is_intel_graniterapids()) {
> +		} else if (x86__is_snc_supported()) {
>  			if (starts_with(pmu->name, "uncore_cha_"))
> -				gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
> -			else if (starts_with(pmu->name, "uncore_imc_"))
> -				gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
> +				uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
> +			else if (starts_with(pmu->name, "uncore_imc_") &&
> +				 !starts_with(pmu->name, "uncore_imc_free_running"))
> +				uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
>  		}
>  	}
>  }
> --
> 2.53.0.273.g2a3d683680-goog
> 
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] perf pmu intel: Adjust cpumaks for sub-NUMA clusters on Emeraldrapids
  2026-02-13  0:08 ` Chen, Zide
@ 2026-04-01 16:38   ` Ian Rogers
  2026-04-02  0:07     ` Namhyung Kim
  0 siblings, 1 reply; 5+ messages in thread
From: Ian Rogers @ 2026-04-01 16:38 UTC (permalink / raw)
  To: Chen, Zide
  Cc: Chun-Tse Shao, linux-kernel, peterz, mingo, acme, namhyung,
	mark.rutland, alexander.shishkin, jolsa, adrian.hunter,
	james.clark, ravi.bangoria, linux-perf-users

On Thu, Feb 12, 2026 at 4:08 PM Chen, Zide <zide.chen@intel.com> wrote:
>
> On 2/12/2026 2:39 PM, Chun-Tse Shao wrote:
> > Similar to GNR [1], Emeraldrapids supports sub-NUMA clusters as well.
> > Adjust cpumasks as the logic for GNR in [1].
> >
> > Tested on Emeraldrapids with SNC2 enabled:
> >   $ perf stat --per-node -e 'UNC_CHA_CLOCKTICKS,UNC_M_CLOCKTICKS' -a -- sleep 1
> >
> >    Performance counter stats for 'system wide':
> >
> >   N0       30        72125876670      UNC_CHA_CLOCKTICKS
> >   N0        4         8815163648      UNC_M_CLOCKTICKS
> >   N1       30        72124958844      UNC_CHA_CLOCKTICKS
> >   N1        4         8815014974      UNC_M_CLOCKTICKS
> >   N2       30        72121049022      UNC_CHA_CLOCKTICKS
> >   N2        4         8814592626      UNC_M_CLOCKTICKS
> >   N3       30        72117133854      UNC_CHA_CLOCKTICKS
> >   N3        4         8814012840      UNC_M_CLOCKTICKS
> >
> >          1.001574118 seconds time elapsed
> >
> > [1] lore.kernel.org/20250515181417.491401-1-irogers@google.com
> >
> > Signed-off-by: Chun-Tse Shao <ctshao@google.com>
>
> LGTM.
>
> Reviewed-by: Zide Chen <zide.chen@intel.com>

Reviewed-by: Ian Rogers <irogers@google.com>

Thanks,
Ian

> > ---
> > v3:
> >   Fix a typo.
> >
> > v2: lore.kernel.org/20260205232220.1980168-1-ctshao@google.com
> >   Split EMR and GNR in the SNC2 IMC cpu map.
> >
> > v1: lore.kernel.org/20260108184430.1210223-1-ctshao@google.com
> >
> >  tools/perf/arch/x86/util/pmu.c | 95 ++++++++++++++++++++++------------
> >  1 file changed, 63 insertions(+), 32 deletions(-)
> >
> > diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> > index a3f96221758d..2215984349f5 100644
> > --- a/tools/perf/arch/x86/util/pmu.c
> > +++ b/tools/perf/arch/x86/util/pmu.c
> > @@ -22,20 +22,29 @@
> >  #include "util/env.h"
> >  #include "util/header.h"
> >
> > -static bool x86__is_intel_graniterapids(void)
> > +static bool x86__is_snc_supported(void)
> >  {
> > -     static bool checked_if_graniterapids;
> > -     static bool is_graniterapids;
> > +     static bool checked_if_snc_supported;
> > +     static bool is_supported;
> >
> > -     if (!checked_if_graniterapids) {
> > -             const char *graniterapids_cpuid = "GenuineIntel-6-A[DE]";
> > +     if (!checked_if_snc_supported) {
> > +
> > +             /* Emeraldrapids and Graniterapids support SNC configuration. */
> > +             static const char *const supported_cpuids[] = {
> > +                     "GenuineIntel-6-CF", /* Emeraldrapids */
> > +                     "GenuineIntel-6-A[DE]", /* Graniterapids */
> > +             };
> >               char *cpuid = get_cpuid_str((struct perf_cpu){0});
> >
> > -             is_graniterapids = cpuid && strcmp_cpuid_str(graniterapids_cpuid, cpuid) == 0;
> > +             for (size_t i = 0; i < ARRAY_SIZE(supported_cpuids); i++) {
> > +                     is_supported = cpuid && strcmp_cpuid_str(supported_cpuids[i], cpuid) == 0;
> > +                     if (is_supported)
> > +                             break;
> > +             }
> >               free(cpuid);
> > -             checked_if_graniterapids = true;
> > +             checked_if_snc_supported = true;
> >       }
> > -     return is_graniterapids;
> > +     return is_supported;
> >  }
> >
> >  static struct perf_cpu_map *read_sysfs_cpu_map(const char *sysfs_path)
> > @@ -64,6 +73,7 @@ static int snc_nodes_per_l3_cache(void)
> >                       read_sysfs_cpu_map("devices/system/cpu/cpu0/cache/index3/shared_cpu_list");
> >
> >               snc_nodes = perf_cpu_map__nr(cache_cpus) / perf_cpu_map__nr(node_cpus);
> > +
> >               perf_cpu_map__put(cache_cpus);
> >               perf_cpu_map__put(node_cpus);
> >               checked_snc = true;
> > @@ -137,23 +147,42 @@ static int uncore_imc_snc(struct perf_pmu *pmu)
> >       // Compute the IMC SNC using lookup tables.
> >       unsigned int imc_num;
> >       int snc_nodes = snc_nodes_per_l3_cache();
> > -     const u8 snc2_map[] = {1, 1, 0, 0, 1, 1, 0, 0};
> > -     const u8 snc3_map[] = {1, 1, 0, 0, 2, 2, 1, 1, 0, 0, 2, 2};
> > -     const u8 *snc_map;
> > -     size_t snc_map_len;
> > -
> > -     switch (snc_nodes) {
> > -     case 2:
> > -             snc_map = snc2_map;
> > -             snc_map_len = ARRAY_SIZE(snc2_map);
> > -             break;
> > -     case 3:
> > -             snc_map = snc3_map;
> > -             snc_map_len = ARRAY_SIZE(snc3_map);
> > -             break;
> > -     default:
> > -             /* Error or no lookup support for SNC with >3 nodes. */
> > -             return 0;
> > +     char *cpuid;
> > +     static const u8 emr_snc2_map[] = { 0, 0, 1, 1 };
> > +     static const u8 gnr_snc2_map[] = { 1, 1, 0, 0 };
> > +     static const u8 snc3_map[] = { 1, 1, 0, 0, 2, 2 };
> > +     static const u8 *snc_map;
> > +     static size_t snc_map_len;
> > +
> > +     /* snc_map is not inited yet. We only look up once to avoid expensive operations. */
> > +     if (!snc_map) {
> > +             switch (snc_nodes) {
> > +             case 2:
> > +                     cpuid = get_cpuid_str((struct perf_cpu){ 0 });
> > +                     if (cpuid) {
> > +                             if (strcmp_cpuid_str("GenuineIntel-6-CF", cpuid) == 0) {
> > +                                     snc_map = emr_snc2_map;
> > +                                     snc_map_len = ARRAY_SIZE(emr_snc2_map);
> > +                             } else if (strcmp_cpuid_str("GenuineIntel-6-A[DE]", cpuid) == 0) {
> > +                                     snc_map = gnr_snc2_map;
> > +                                     snc_map_len = ARRAY_SIZE(gnr_snc2_map);
> > +                             }
> > +                             free(cpuid);
> > +                     }
> > +                     break;
> > +             case 3:
> > +                     snc_map = snc3_map;
> > +                     snc_map_len = ARRAY_SIZE(snc3_map);
> > +                     break;
> > +             default:
> > +                     /* Error or no lookup support for SNC with >3 nodes. */
> > +                     return 0;
> > +             }
> > +
> > +             if (!snc_map) {
> > +                     pr_warning("Unexpected: can not find snc map config");
> > +                     return 0;
> > +             }
> >       }
> >
> >       /* Compute SNC for PMU. */
> > @@ -161,11 +190,12 @@ static int uncore_imc_snc(struct perf_pmu *pmu)
> >               pr_warning("Unexpected: unable to compute IMC number '%s'\n", pmu->name);
> >               return 0;
> >       }
> > -     if (imc_num >= snc_map_len) {
> > +     if (imc_num >= snc_map_len * perf_cpu_map__nr(pmu->cpus)) {
> >               pr_warning("Unexpected IMC %d for SNC%d mapping\n", imc_num, snc_nodes);
> >               return 0;
> >       }
> > -     return snc_map[imc_num];
> > +
> > +     return snc_map[imc_num % snc_map_len];
> >  }
> >
> >  static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
> > @@ -205,7 +235,7 @@ static int uncore_cha_imc_compute_cpu_adjust(int pmu_snc)
> >       return cpu_adjust[pmu_snc];
> >  }
> >
> > -static void gnr_uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
> > +static void uncore_cha_imc_adjust_cpumask_for_snc(struct perf_pmu *pmu, bool cha)
> >  {
> >       // With sub-NUMA clustering (SNC) there is a NUMA node per SNC in the
> >       // topology. For example, a two socket graniterapids machine may be set
> > @@ -304,11 +334,12 @@ void perf_pmu__arch_init(struct perf_pmu *pmu)
> >                               pmu->mem_events = perf_mem_events_intel_aux;
> >                       else
> >                               pmu->mem_events = perf_mem_events_intel;
> > -             } else if (x86__is_intel_graniterapids()) {
> > +             } else if (x86__is_snc_supported()) {
> >                       if (starts_with(pmu->name, "uncore_cha_"))
> > -                             gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
> > -                     else if (starts_with(pmu->name, "uncore_imc_"))
> > -                             gnr_uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
> > +                             uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/true);
> > +                     else if (starts_with(pmu->name, "uncore_imc_") &&
> > +                              !starts_with(pmu->name, "uncore_imc_free_running"))
> > +                             uncore_cha_imc_adjust_cpumask_for_snc(pmu, /*cha=*/false);
> >               }
> >       }
> >  }
> > --
> > 2.53.0.273.g2a3d683680-goog
> >
> >
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] perf pmu intel: Adjust cpumaks for sub-NUMA clusters on Emeraldrapids
  2026-04-01 16:38   ` Ian Rogers
@ 2026-04-02  0:07     ` Namhyung Kim
  2026-04-02 20:54       ` Chun-Tse Shao
  0 siblings, 1 reply; 5+ messages in thread
From: Namhyung Kim @ 2026-04-02  0:07 UTC (permalink / raw)
  To: Ian Rogers
  Cc: Chen, Zide, Chun-Tse Shao, linux-kernel, peterz, mingo, acme,
	mark.rutland, alexander.shishkin, jolsa, adrian.hunter,
	james.clark, ravi.bangoria, linux-perf-users

On Wed, Apr 01, 2026 at 09:38:24AM -0700, Ian Rogers wrote:
> On Thu, Feb 12, 2026 at 4:08 PM Chen, Zide <zide.chen@intel.com> wrote:
> >
> > On 2/12/2026 2:39 PM, Chun-Tse Shao wrote:
> > > Similar to GNR [1], Emeraldrapids supports sub-NUMA clusters as well.
> > > Adjust cpumasks as the logic for GNR in [1].
> > >
> > > Tested on Emeraldrapids with SNC2 enabled:
> > >   $ perf stat --per-node -e 'UNC_CHA_CLOCKTICKS,UNC_M_CLOCKTICKS' -a -- sleep 1
> > >
> > >    Performance counter stats for 'system wide':
> > >
> > >   N0       30        72125876670      UNC_CHA_CLOCKTICKS
> > >   N0        4         8815163648      UNC_M_CLOCKTICKS
> > >   N1       30        72124958844      UNC_CHA_CLOCKTICKS
> > >   N1        4         8815014974      UNC_M_CLOCKTICKS
> > >   N2       30        72121049022      UNC_CHA_CLOCKTICKS
> > >   N2        4         8814592626      UNC_M_CLOCKTICKS
> > >   N3       30        72117133854      UNC_CHA_CLOCKTICKS
> > >   N3        4         8814012840      UNC_M_CLOCKTICKS
> > >
> > >          1.001574118 seconds time elapsed
> > >
> > > [1] lore.kernel.org/20250515181417.491401-1-irogers@google.com
> > >
> > > Signed-off-by: Chun-Tse Shao <ctshao@google.com>
> >
> > LGTM.
> >
> > Reviewed-by: Zide Chen <zide.chen@intel.com>
> 
> Reviewed-by: Ian Rogers <irogers@google.com>

It doesn't apply cleanly anymore.  CT, can you please rebase?

Thanks,
Namhyung


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3] perf pmu intel: Adjust cpumaks for sub-NUMA clusters on Emeraldrapids
  2026-04-02  0:07     ` Namhyung Kim
@ 2026-04-02 20:54       ` Chun-Tse Shao
  0 siblings, 0 replies; 5+ messages in thread
From: Chun-Tse Shao @ 2026-04-02 20:54 UTC (permalink / raw)
  To: Namhyung Kim
  Cc: Ian Rogers, Chen, Zide, linux-kernel, peterz, mingo, acme,
	mark.rutland, alexander.shishkin, jolsa, adrian.hunter,
	james.clark, ravi.bangoria, linux-perf-users

Please check patch v4 for rebasing.

https://lore.kernel.org/20260402205300.1953706-1-ctshao@google.com

Thanks,
CT

On Wed, Apr 1, 2026 at 5:08 PM Namhyung Kim <namhyung@kernel.org> wrote:
>
> On Wed, Apr 01, 2026 at 09:38:24AM -0700, Ian Rogers wrote:
> > On Thu, Feb 12, 2026 at 4:08 PM Chen, Zide <zide.chen@intel.com> wrote:
> > >
> > > On 2/12/2026 2:39 PM, Chun-Tse Shao wrote:
> > > > Similar to GNR [1], Emeraldrapids supports sub-NUMA clusters as well.
> > > > Adjust cpumasks as the logic for GNR in [1].
> > > >
> > > > Tested on Emeraldrapids with SNC2 enabled:
> > > >   $ perf stat --per-node -e 'UNC_CHA_CLOCKTICKS,UNC_M_CLOCKTICKS' -a -- sleep 1
> > > >
> > > >    Performance counter stats for 'system wide':
> > > >
> > > >   N0       30        72125876670      UNC_CHA_CLOCKTICKS
> > > >   N0        4         8815163648      UNC_M_CLOCKTICKS
> > > >   N1       30        72124958844      UNC_CHA_CLOCKTICKS
> > > >   N1        4         8815014974      UNC_M_CLOCKTICKS
> > > >   N2       30        72121049022      UNC_CHA_CLOCKTICKS
> > > >   N2        4         8814592626      UNC_M_CLOCKTICKS
> > > >   N3       30        72117133854      UNC_CHA_CLOCKTICKS
> > > >   N3        4         8814012840      UNC_M_CLOCKTICKS
> > > >
> > > >          1.001574118 seconds time elapsed
> > > >
> > > > [1] lore.kernel.org/20250515181417.491401-1-irogers@google.com
> > > >
> > > > Signed-off-by: Chun-Tse Shao <ctshao@google.com>
> > >
> > > LGTM.
> > >
> > > Reviewed-by: Zide Chen <zide.chen@intel.com>
> >
> > Reviewed-by: Ian Rogers <irogers@google.com>
>
> It doesn't apply cleanly anymore.  CT, can you please rebase?
>
> Thanks,
> Namhyung
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-04-02 20:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-12 22:39 [PATCH v3] perf pmu intel: Adjust cpumaks for sub-NUMA clusters on Emeraldrapids Chun-Tse Shao
2026-02-13  0:08 ` Chen, Zide
2026-04-01 16:38   ` Ian Rogers
2026-04-02  0:07     ` Namhyung Kim
2026-04-02 20:54       ` Chun-Tse Shao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox