public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Dapeng Mi <dapeng1.mi@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Kan Liang <kan.liang@linux.intel.com>,
	Andi Kleen <ak@linux.intel.com>,
	Eranian Stephane <eranian@google.com>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Dapeng Mi <dapeng1.mi@intel.com>,
	Dapeng Mi <dapeng1.mi@linux.intel.com>
Subject: [Patch v3 19/22] perf tools: Enhance arch__intr/user_reg_mask() helpers
Date: Tue, 15 Apr 2025 11:44:25 +0000	[thread overview]
Message-ID: <20250415114428.341182-20-dapeng1.mi@linux.intel.com> (raw)
In-Reply-To: <20250415114428.341182-1-dapeng1.mi@linux.intel.com>

Arch-PEBS supports to capture more higher-width vector registers, like
YMM/ZMM registers, while the return value "uint64_t" of these 2 helpers
is not enough to represent these new added registors. Thus enhance these
two helpers by passing a "unsigned long" pointer, so these two helpers
can return more bits via this pointer.

Currently only sample_intr_regs supports these new added vector
registers, but change arch__user_reg_mask() for the sake of consistency
as well.

Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 tools/perf/arch/arm/util/perf_regs.c       |  8 ++++----
 tools/perf/arch/arm64/util/perf_regs.c     | 11 ++++++-----
 tools/perf/arch/csky/util/perf_regs.c      |  8 ++++----
 tools/perf/arch/loongarch/util/perf_regs.c |  8 ++++----
 tools/perf/arch/mips/util/perf_regs.c      |  8 ++++----
 tools/perf/arch/powerpc/util/perf_regs.c   | 17 +++++++++--------
 tools/perf/arch/riscv/util/perf_regs.c     |  8 ++++----
 tools/perf/arch/s390/util/perf_regs.c      |  8 ++++----
 tools/perf/arch/x86/util/perf_regs.c       | 13 +++++++------
 tools/perf/util/evsel.c                    |  6 ++++--
 tools/perf/util/parse-regs-options.c       |  6 +++---
 tools/perf/util/perf_regs.c                |  8 ++++----
 tools/perf/util/perf_regs.h                |  4 ++--
 13 files changed, 59 insertions(+), 54 deletions(-)

diff --git a/tools/perf/arch/arm/util/perf_regs.c b/tools/perf/arch/arm/util/perf_regs.c
index f94a0210c7b7..14f18d518c96 100644
--- a/tools/perf/arch/arm/util/perf_regs.c
+++ b/tools/perf/arch/arm/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG_END
 };
 
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
 const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
index 09308665e28a..9bcf4755290c 100644
--- a/tools/perf/arch/arm64/util/perf_regs.c
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -140,12 +140,12 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
 	return SDT_ARG_VALID;
 }
 
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
 {
 	struct perf_event_attr attr = {
 		.type                   = PERF_TYPE_HARDWARE,
@@ -170,10 +170,11 @@ uint64_t arch__user_reg_mask(void)
 		fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
 		if (fd != -1) {
 			close(fd);
-			return attr.sample_regs_user;
+			*(uint64_t *)mask = attr.sample_regs_user;
+			return;
 		}
 	}
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
 const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/csky/util/perf_regs.c b/tools/perf/arch/csky/util/perf_regs.c
index 6b1665f41180..56c84fc91aff 100644
--- a/tools/perf/arch/csky/util/perf_regs.c
+++ b/tools/perf/arch/csky/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG_END
 };
 
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
 const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/loongarch/util/perf_regs.c b/tools/perf/arch/loongarch/util/perf_regs.c
index f94a0210c7b7..14f18d518c96 100644
--- a/tools/perf/arch/loongarch/util/perf_regs.c
+++ b/tools/perf/arch/loongarch/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG_END
 };
 
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
 const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/mips/util/perf_regs.c b/tools/perf/arch/mips/util/perf_regs.c
index 6b1665f41180..56c84fc91aff 100644
--- a/tools/perf/arch/mips/util/perf_regs.c
+++ b/tools/perf/arch/mips/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG_END
 };
 
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
 const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index bd36cfd420a2..e5d042305030 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -187,7 +187,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
 	return SDT_ARG_VALID;
 }
 
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
 {
 	struct perf_event_attr attr = {
 		.type                   = PERF_TYPE_HARDWARE,
@@ -199,7 +199,7 @@ uint64_t arch__intr_reg_mask(void)
 	};
 	int fd;
 	u32 version;
-	u64 extended_mask = 0, mask = PERF_REGS_MASK;
+	u64 extended_mask = 0;
 
 	/*
 	 * Get the PVR value to set the extended
@@ -210,8 +210,10 @@ uint64_t arch__intr_reg_mask(void)
 		extended_mask = PERF_REG_PMU_MASK_300;
 	else if ((version == PVR_POWER10) || (version == PVR_POWER11))
 		extended_mask = PERF_REG_PMU_MASK_31;
-	else
-		return mask;
+	else {
+		*(u64 *)mask = PERF_REGS_MASK;
+		return;
+	}
 
 	attr.sample_regs_intr = extended_mask;
 	attr.sample_period = 1;
@@ -224,14 +226,13 @@ uint64_t arch__intr_reg_mask(void)
 	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
 	if (fd != -1) {
 		close(fd);
-		mask |= extended_mask;
+		*(u64 *)mask = PERF_REGS_MASK | extended_mask;
 	}
-	return mask;
 }
 
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
 const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/riscv/util/perf_regs.c b/tools/perf/arch/riscv/util/perf_regs.c
index 6b1665f41180..56c84fc91aff 100644
--- a/tools/perf/arch/riscv/util/perf_regs.c
+++ b/tools/perf/arch/riscv/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG_END
 };
 
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
 const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/s390/util/perf_regs.c b/tools/perf/arch/s390/util/perf_regs.c
index 6b1665f41180..56c84fc91aff 100644
--- a/tools/perf/arch/s390/util/perf_regs.c
+++ b/tools/perf/arch/s390/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
 	SMPL_REG_END
 };
 
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
 
 const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 9f492568f3b4..5b163f0a651a 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -283,7 +283,7 @@ const struct sample_reg *arch__sample_reg_masks(void)
 	return sample_reg_masks;
 }
 
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
 {
 	struct perf_event_attr attr = {
 		.type			= PERF_TYPE_HARDWARE,
@@ -295,6 +295,9 @@ uint64_t arch__intr_reg_mask(void)
 		.exclude_kernel		= 1,
 	};
 	int fd;
+
+	*(u64 *)mask = PERF_REGS_MASK;
+
 	/*
 	 * In an unnamed union, init it here to build on older gcc versions
 	 */
@@ -320,13 +323,11 @@ uint64_t arch__intr_reg_mask(void)
 	fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
 	if (fd != -1) {
 		close(fd);
-		return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK);
+		*(u64 *)mask = PERF_REG_EXTENDED_MASK | PERF_REGS_MASK;
 	}
-
-	return PERF_REGS_MASK;
 }
 
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
 {
-	return PERF_REGS_MASK;
+	*(uint64_t *)mask = PERF_REGS_MASK;
 }
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1974395492d7..6e71187d6a93 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1056,17 +1056,19 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
 	if (param->record_mode == CALLCHAIN_DWARF) {
 		if (!function) {
 			const char *arch = perf_env__arch(evsel__env(evsel));
+			uint64_t mask = 0;
 
+			arch__user_reg_mask((unsigned long *)&mask);
 			evsel__set_sample_bit(evsel, REGS_USER);
 			evsel__set_sample_bit(evsel, STACK_USER);
 			if (opts->sample_user_regs &&
-			    DWARF_MINIMAL_REGS(arch) != arch__user_reg_mask()) {
+			    DWARF_MINIMAL_REGS(arch) != mask) {
 				attr->sample_regs_user |= DWARF_MINIMAL_REGS(arch);
 				pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
 					   "specifying a subset with --user-regs may render DWARF unwinding unreliable, "
 					   "so the minimal registers set (IP, SP) is explicitly forced.\n");
 			} else {
-				attr->sample_regs_user |= arch__user_reg_mask();
+				attr->sample_regs_user |= mask;
 			}
 			attr->sample_stack_user = param->dump_size;
 			attr->exclude_callchain_user = 1;
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
index cda1c620968e..3dcd8dc4f81b 100644
--- a/tools/perf/util/parse-regs-options.c
+++ b/tools/perf/util/parse-regs-options.c
@@ -16,7 +16,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
 	const struct sample_reg *r = NULL;
 	char *s, *os = NULL, *p;
 	int ret = -1;
-	uint64_t mask;
+	uint64_t mask = 0;
 
 	if (unset)
 		return 0;
@@ -28,9 +28,9 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
 		return -1;
 
 	if (intr)
-		mask = arch__intr_reg_mask();
+		arch__intr_reg_mask((unsigned long *)&mask);
 	else
-		mask = arch__user_reg_mask();
+		arch__user_reg_mask((unsigned long *)&mask);
 
 	/* str may be NULL in case no arg is passed to -I */
 	if (str) {
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 44b90bbf2d07..7a96290fd1e6 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -11,14 +11,14 @@ int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
 	return SDT_ARG_SKIP;
 }
 
-uint64_t __weak arch__intr_reg_mask(void)
+void __weak arch__intr_reg_mask(unsigned long *mask)
 {
-	return 0;
+	*(uint64_t *)mask = 0;
 }
 
-uint64_t __weak arch__user_reg_mask(void)
+void __weak arch__user_reg_mask(unsigned long *mask)
 {
-	return 0;
+	*(uint64_t *)mask = 0;
 }
 
 static const struct sample_reg sample_reg_masks[] = {
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index f2d0736d65cc..316d280e5cd7 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -24,8 +24,8 @@ enum {
 };
 
 int arch_sdt_arg_parse_op(char *old_op, char **new_op);
-uint64_t arch__intr_reg_mask(void);
-uint64_t arch__user_reg_mask(void);
+void arch__intr_reg_mask(unsigned long *mask);
+void arch__user_reg_mask(unsigned long *mask);
 const struct sample_reg *arch__sample_reg_masks(void);
 
 const char *perf_reg_name(int id, const char *arch);
-- 
2.40.1


  parent reply	other threads:[~2025-04-15  8:24 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-15 11:44 [Patch v3 00/22] Arch-PEBS and PMU supports for Clearwater Forest and Panther Lake Dapeng Mi
2025-04-15 11:44 ` [Patch v3 01/22] perf/x86/intel: Add Panther Lake support Dapeng Mi
2025-04-17 13:01   ` [tip: perf/core] " tip-bot2 for Kan Liang
2025-04-15 11:44 ` [Patch v3 02/22] perf/x86/intel: Add PMU support for Clearwater Forest Dapeng Mi
2025-04-17 13:01   ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 03/22] perf/x86/intel: Parse CPUID archPerfmonExt leaves for non-hybrid CPUs Dapeng Mi
2025-04-17 13:01   ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 04/22] perf/x86/intel: Decouple BTS initialization from PEBS initialization Dapeng Mi
2025-04-17 13:01   ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 05/22] perf/x86/intel: Rename x86_pmu.pebs to x86_pmu.ds_pebs Dapeng Mi
2025-04-17 13:01   ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 06/22] perf/x86/intel: Introduce pairs of PEBS static calls Dapeng Mi
2025-04-17 13:00   ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 07/22] perf/x86/intel: Initialize architectural PEBS Dapeng Mi
2025-04-15 11:44 ` [Patch v3 08/22] perf/x86/intel/ds: Factor out PEBS record processing code to functions Dapeng Mi
2025-04-15 11:44 ` [Patch v3 09/22] perf/x86/intel/ds: Factor out PEBS group " Dapeng Mi
2025-04-15 11:44 ` [Patch v3 10/22] perf/x86/intel: Process arch-PEBS records or record fragments Dapeng Mi
2025-04-15 13:57   ` Peter Zijlstra
2025-04-15 16:09     ` Liang, Kan
2025-04-15 11:44 ` [Patch v3 11/22] perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR Dapeng Mi
2025-04-15 13:45   ` Peter Zijlstra
2025-04-16  0:59     ` Mi, Dapeng
2025-04-15 13:48   ` Peter Zijlstra
2025-04-16  1:03     ` Mi, Dapeng
2025-04-15 11:44 ` [Patch v3 12/22] perf/x86/intel: Update dyn_constranit base on PEBS event precise level Dapeng Mi
2025-04-15 13:53   ` Peter Zijlstra
2025-04-15 16:31     ` Liang, Kan
2025-04-16  1:46       ` Mi, Dapeng
2025-04-16 13:59         ` Liang, Kan
2025-04-17  1:15           ` Mi, Dapeng
2025-04-16 15:32       ` Peter Zijlstra
2025-04-16 19:45         ` Liang, Kan
2025-04-16 19:56           ` Peter Zijlstra
2025-04-22 22:50             ` Liang, Kan
2025-04-15 11:44 ` [Patch v3 13/22] perf/x86/intel: Setup PEBS data configuration and enable legacy groups Dapeng Mi
2025-04-15 11:44 ` [Patch v3 14/22] perf/x86/intel: Add counter group support for arch-PEBS Dapeng Mi
2025-04-15 11:44 ` [Patch v3 15/22] perf/x86/intel: Support SSP register capturing " Dapeng Mi
2025-04-15 14:07   ` Peter Zijlstra
2025-04-16  5:49     ` Mi, Dapeng
2025-04-15 11:44 ` [Patch v3 16/22] perf/core: Support to capture higher width vector registers Dapeng Mi
2025-04-15 14:36   ` Peter Zijlstra
2025-04-16  6:42     ` Mi, Dapeng
2025-04-16 15:53       ` Peter Zijlstra
2025-04-17  2:00         ` Mi, Dapeng
2025-04-22  3:05         ` Mi, Dapeng
2025-04-15 11:44 ` [Patch v3 17/22] perf/x86/intel: Support arch-PEBS vector registers group capturing Dapeng Mi
2025-04-15 11:44 ` [Patch v3 18/22] perf tools: Support to show SSP register Dapeng Mi
2025-04-15 11:44 ` Dapeng Mi [this message]
2025-04-15 11:44 ` [Patch v3 20/22] perf tools: Enhance sample_regs_user/intr to capture more registers Dapeng Mi
2025-04-15 11:44 ` [Patch v3 21/22] perf tools: Support to capture more vector registers (x86/Intel) Dapeng Mi
2025-04-15 11:44 ` [Patch v3 22/22] perf tools/tests: Add vector registers PEBS sampling test Dapeng Mi
2025-04-15 15:21 ` [Patch v3 00/22] Arch-PEBS and PMU supports for Clearwater Forest and Panther Lake Liang, Kan
2025-04-16  7:42   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250415114428.341182-20-dapeng1.mi@linux.intel.com \
    --to=dapeng1.mi@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=dapeng1.mi@intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox