From: Dapeng Mi <dapeng1.mi@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
Ian Rogers <irogers@google.com>,
Adrian Hunter <adrian.hunter@intel.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Kan Liang <kan.liang@linux.intel.com>,
Andi Kleen <ak@linux.intel.com>,
Eranian Stephane <eranian@google.com>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
Dapeng Mi <dapeng1.mi@intel.com>,
Dapeng Mi <dapeng1.mi@linux.intel.com>
Subject: [Patch v3 19/22] perf tools: Enhance arch__intr/user_reg_mask() helpers
Date: Tue, 15 Apr 2025 11:44:25 +0000 [thread overview]
Message-ID: <20250415114428.341182-20-dapeng1.mi@linux.intel.com> (raw)
In-Reply-To: <20250415114428.341182-1-dapeng1.mi@linux.intel.com>
Arch-PEBS supports to capture more higher-width vector registers, like
YMM/ZMM registers, while the return value "uint64_t" of these 2 helpers
is not enough to represent these new added registors. Thus enhance these
two helpers by passing a "unsigned long" pointer, so these two helpers
can return more bits via this pointer.
Currently only sample_intr_regs supports these new added vector
registers, but change arch__user_reg_mask() for the sake of consistency
as well.
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
tools/perf/arch/arm/util/perf_regs.c | 8 ++++----
tools/perf/arch/arm64/util/perf_regs.c | 11 ++++++-----
tools/perf/arch/csky/util/perf_regs.c | 8 ++++----
tools/perf/arch/loongarch/util/perf_regs.c | 8 ++++----
tools/perf/arch/mips/util/perf_regs.c | 8 ++++----
tools/perf/arch/powerpc/util/perf_regs.c | 17 +++++++++--------
tools/perf/arch/riscv/util/perf_regs.c | 8 ++++----
tools/perf/arch/s390/util/perf_regs.c | 8 ++++----
tools/perf/arch/x86/util/perf_regs.c | 13 +++++++------
tools/perf/util/evsel.c | 6 ++++--
tools/perf/util/parse-regs-options.c | 6 +++---
tools/perf/util/perf_regs.c | 8 ++++----
tools/perf/util/perf_regs.h | 4 ++--
13 files changed, 59 insertions(+), 54 deletions(-)
diff --git a/tools/perf/arch/arm/util/perf_regs.c b/tools/perf/arch/arm/util/perf_regs.c
index f94a0210c7b7..14f18d518c96 100644
--- a/tools/perf/arch/arm/util/perf_regs.c
+++ b/tools/perf/arch/arm/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/arm64/util/perf_regs.c b/tools/perf/arch/arm64/util/perf_regs.c
index 09308665e28a..9bcf4755290c 100644
--- a/tools/perf/arch/arm64/util/perf_regs.c
+++ b/tools/perf/arch/arm64/util/perf_regs.c
@@ -140,12 +140,12 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
@@ -170,10 +170,11 @@ uint64_t arch__user_reg_mask(void)
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
if (fd != -1) {
close(fd);
- return attr.sample_regs_user;
+ *(uint64_t *)mask = attr.sample_regs_user;
+ return;
}
}
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/csky/util/perf_regs.c b/tools/perf/arch/csky/util/perf_regs.c
index 6b1665f41180..56c84fc91aff 100644
--- a/tools/perf/arch/csky/util/perf_regs.c
+++ b/tools/perf/arch/csky/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/loongarch/util/perf_regs.c b/tools/perf/arch/loongarch/util/perf_regs.c
index f94a0210c7b7..14f18d518c96 100644
--- a/tools/perf/arch/loongarch/util/perf_regs.c
+++ b/tools/perf/arch/loongarch/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/mips/util/perf_regs.c b/tools/perf/arch/mips/util/perf_regs.c
index 6b1665f41180..56c84fc91aff 100644
--- a/tools/perf/arch/mips/util/perf_regs.c
+++ b/tools/perf/arch/mips/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index bd36cfd420a2..e5d042305030 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -187,7 +187,7 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
@@ -199,7 +199,7 @@ uint64_t arch__intr_reg_mask(void)
};
int fd;
u32 version;
- u64 extended_mask = 0, mask = PERF_REGS_MASK;
+ u64 extended_mask = 0;
/*
* Get the PVR value to set the extended
@@ -210,8 +210,10 @@ uint64_t arch__intr_reg_mask(void)
extended_mask = PERF_REG_PMU_MASK_300;
else if ((version == PVR_POWER10) || (version == PVR_POWER11))
extended_mask = PERF_REG_PMU_MASK_31;
- else
- return mask;
+ else {
+ *(u64 *)mask = PERF_REGS_MASK;
+ return;
+ }
attr.sample_regs_intr = extended_mask;
attr.sample_period = 1;
@@ -224,14 +226,13 @@ uint64_t arch__intr_reg_mask(void)
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
if (fd != -1) {
close(fd);
- mask |= extended_mask;
+ *(u64 *)mask = PERF_REGS_MASK | extended_mask;
}
- return mask;
}
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/riscv/util/perf_regs.c b/tools/perf/arch/riscv/util/perf_regs.c
index 6b1665f41180..56c84fc91aff 100644
--- a/tools/perf/arch/riscv/util/perf_regs.c
+++ b/tools/perf/arch/riscv/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/s390/util/perf_regs.c b/tools/perf/arch/s390/util/perf_regs.c
index 6b1665f41180..56c84fc91aff 100644
--- a/tools/perf/arch/s390/util/perf_regs.c
+++ b/tools/perf/arch/s390/util/perf_regs.c
@@ -6,14 +6,14 @@ static const struct sample_reg sample_reg_masks[] = {
SMPL_REG_END
};
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
const struct sample_reg *arch__sample_reg_masks(void)
diff --git a/tools/perf/arch/x86/util/perf_regs.c b/tools/perf/arch/x86/util/perf_regs.c
index 9f492568f3b4..5b163f0a651a 100644
--- a/tools/perf/arch/x86/util/perf_regs.c
+++ b/tools/perf/arch/x86/util/perf_regs.c
@@ -283,7 +283,7 @@ const struct sample_reg *arch__sample_reg_masks(void)
return sample_reg_masks;
}
-uint64_t arch__intr_reg_mask(void)
+void arch__intr_reg_mask(unsigned long *mask)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
@@ -295,6 +295,9 @@ uint64_t arch__intr_reg_mask(void)
.exclude_kernel = 1,
};
int fd;
+
+ *(u64 *)mask = PERF_REGS_MASK;
+
/*
* In an unnamed union, init it here to build on older gcc versions
*/
@@ -320,13 +323,11 @@ uint64_t arch__intr_reg_mask(void)
fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
if (fd != -1) {
close(fd);
- return (PERF_REG_EXTENDED_MASK | PERF_REGS_MASK);
+ *(u64 *)mask = PERF_REG_EXTENDED_MASK | PERF_REGS_MASK;
}
-
- return PERF_REGS_MASK;
}
-uint64_t arch__user_reg_mask(void)
+void arch__user_reg_mask(unsigned long *mask)
{
- return PERF_REGS_MASK;
+ *(uint64_t *)mask = PERF_REGS_MASK;
}
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1974395492d7..6e71187d6a93 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1056,17 +1056,19 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o
if (param->record_mode == CALLCHAIN_DWARF) {
if (!function) {
const char *arch = perf_env__arch(evsel__env(evsel));
+ uint64_t mask = 0;
+ arch__user_reg_mask((unsigned long *)&mask);
evsel__set_sample_bit(evsel, REGS_USER);
evsel__set_sample_bit(evsel, STACK_USER);
if (opts->sample_user_regs &&
- DWARF_MINIMAL_REGS(arch) != arch__user_reg_mask()) {
+ DWARF_MINIMAL_REGS(arch) != mask) {
attr->sample_regs_user |= DWARF_MINIMAL_REGS(arch);
pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
"specifying a subset with --user-regs may render DWARF unwinding unreliable, "
"so the minimal registers set (IP, SP) is explicitly forced.\n");
} else {
- attr->sample_regs_user |= arch__user_reg_mask();
+ attr->sample_regs_user |= mask;
}
attr->sample_stack_user = param->dump_size;
attr->exclude_callchain_user = 1;
diff --git a/tools/perf/util/parse-regs-options.c b/tools/perf/util/parse-regs-options.c
index cda1c620968e..3dcd8dc4f81b 100644
--- a/tools/perf/util/parse-regs-options.c
+++ b/tools/perf/util/parse-regs-options.c
@@ -16,7 +16,7 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
const struct sample_reg *r = NULL;
char *s, *os = NULL, *p;
int ret = -1;
- uint64_t mask;
+ uint64_t mask = 0;
if (unset)
return 0;
@@ -28,9 +28,9 @@ __parse_regs(const struct option *opt, const char *str, int unset, bool intr)
return -1;
if (intr)
- mask = arch__intr_reg_mask();
+ arch__intr_reg_mask((unsigned long *)&mask);
else
- mask = arch__user_reg_mask();
+ arch__user_reg_mask((unsigned long *)&mask);
/* str may be NULL in case no arg is passed to -I */
if (str) {
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 44b90bbf2d07..7a96290fd1e6 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -11,14 +11,14 @@ int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
return SDT_ARG_SKIP;
}
-uint64_t __weak arch__intr_reg_mask(void)
+void __weak arch__intr_reg_mask(unsigned long *mask)
{
- return 0;
+ *(uint64_t *)mask = 0;
}
-uint64_t __weak arch__user_reg_mask(void)
+void __weak arch__user_reg_mask(unsigned long *mask)
{
- return 0;
+ *(uint64_t *)mask = 0;
}
static const struct sample_reg sample_reg_masks[] = {
diff --git a/tools/perf/util/perf_regs.h b/tools/perf/util/perf_regs.h
index f2d0736d65cc..316d280e5cd7 100644
--- a/tools/perf/util/perf_regs.h
+++ b/tools/perf/util/perf_regs.h
@@ -24,8 +24,8 @@ enum {
};
int arch_sdt_arg_parse_op(char *old_op, char **new_op);
-uint64_t arch__intr_reg_mask(void);
-uint64_t arch__user_reg_mask(void);
+void arch__intr_reg_mask(unsigned long *mask);
+void arch__user_reg_mask(unsigned long *mask);
const struct sample_reg *arch__sample_reg_masks(void);
const char *perf_reg_name(int id, const char *arch);
--
2.40.1
next prev parent reply other threads:[~2025-04-15 8:24 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-04-15 11:44 [Patch v3 00/22] Arch-PEBS and PMU supports for Clearwater Forest and Panther Lake Dapeng Mi
2025-04-15 11:44 ` [Patch v3 01/22] perf/x86/intel: Add Panther Lake support Dapeng Mi
2025-04-17 13:01 ` [tip: perf/core] " tip-bot2 for Kan Liang
2025-04-15 11:44 ` [Patch v3 02/22] perf/x86/intel: Add PMU support for Clearwater Forest Dapeng Mi
2025-04-17 13:01 ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 03/22] perf/x86/intel: Parse CPUID archPerfmonExt leaves for non-hybrid CPUs Dapeng Mi
2025-04-17 13:01 ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 04/22] perf/x86/intel: Decouple BTS initialization from PEBS initialization Dapeng Mi
2025-04-17 13:01 ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 05/22] perf/x86/intel: Rename x86_pmu.pebs to x86_pmu.ds_pebs Dapeng Mi
2025-04-17 13:01 ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 06/22] perf/x86/intel: Introduce pairs of PEBS static calls Dapeng Mi
2025-04-17 13:00 ` [tip: perf/core] " tip-bot2 for Dapeng Mi
2025-04-15 11:44 ` [Patch v3 07/22] perf/x86/intel: Initialize architectural PEBS Dapeng Mi
2025-04-15 11:44 ` [Patch v3 08/22] perf/x86/intel/ds: Factor out PEBS record processing code to functions Dapeng Mi
2025-04-15 11:44 ` [Patch v3 09/22] perf/x86/intel/ds: Factor out PEBS group " Dapeng Mi
2025-04-15 11:44 ` [Patch v3 10/22] perf/x86/intel: Process arch-PEBS records or record fragments Dapeng Mi
2025-04-15 13:57 ` Peter Zijlstra
2025-04-15 16:09 ` Liang, Kan
2025-04-15 11:44 ` [Patch v3 11/22] perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR Dapeng Mi
2025-04-15 13:45 ` Peter Zijlstra
2025-04-16 0:59 ` Mi, Dapeng
2025-04-15 13:48 ` Peter Zijlstra
2025-04-16 1:03 ` Mi, Dapeng
2025-04-15 11:44 ` [Patch v3 12/22] perf/x86/intel: Update dyn_constranit base on PEBS event precise level Dapeng Mi
2025-04-15 13:53 ` Peter Zijlstra
2025-04-15 16:31 ` Liang, Kan
2025-04-16 1:46 ` Mi, Dapeng
2025-04-16 13:59 ` Liang, Kan
2025-04-17 1:15 ` Mi, Dapeng
2025-04-16 15:32 ` Peter Zijlstra
2025-04-16 19:45 ` Liang, Kan
2025-04-16 19:56 ` Peter Zijlstra
2025-04-22 22:50 ` Liang, Kan
2025-04-15 11:44 ` [Patch v3 13/22] perf/x86/intel: Setup PEBS data configuration and enable legacy groups Dapeng Mi
2025-04-15 11:44 ` [Patch v3 14/22] perf/x86/intel: Add counter group support for arch-PEBS Dapeng Mi
2025-04-15 11:44 ` [Patch v3 15/22] perf/x86/intel: Support SSP register capturing " Dapeng Mi
2025-04-15 14:07 ` Peter Zijlstra
2025-04-16 5:49 ` Mi, Dapeng
2025-04-15 11:44 ` [Patch v3 16/22] perf/core: Support to capture higher width vector registers Dapeng Mi
2025-04-15 14:36 ` Peter Zijlstra
2025-04-16 6:42 ` Mi, Dapeng
2025-04-16 15:53 ` Peter Zijlstra
2025-04-17 2:00 ` Mi, Dapeng
2025-04-22 3:05 ` Mi, Dapeng
2025-04-15 11:44 ` [Patch v3 17/22] perf/x86/intel: Support arch-PEBS vector registers group capturing Dapeng Mi
2025-04-15 11:44 ` [Patch v3 18/22] perf tools: Support to show SSP register Dapeng Mi
2025-04-15 11:44 ` Dapeng Mi [this message]
2025-04-15 11:44 ` [Patch v3 20/22] perf tools: Enhance sample_regs_user/intr to capture more registers Dapeng Mi
2025-04-15 11:44 ` [Patch v3 21/22] perf tools: Support to capture more vector registers (x86/Intel) Dapeng Mi
2025-04-15 11:44 ` [Patch v3 22/22] perf tools/tests: Add vector registers PEBS sampling test Dapeng Mi
2025-04-15 15:21 ` [Patch v3 00/22] Arch-PEBS and PMU supports for Clearwater Forest and Panther Lake Liang, Kan
2025-04-16 7:42 ` Peter Zijlstra
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250415114428.341182-20-dapeng1.mi@linux.intel.com \
--to=dapeng1.mi@linux.intel.com \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=dapeng1.mi@intel.com \
--cc=eranian@google.com \
--cc=irogers@google.com \
--cc=kan.liang@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.