From: kan.liang@linux.intel.com
To: peterz@infradead.org, mingo@redhat.com, acme@kernel.org,
namhyung@kernel.org, tglx@linutronix.de,
dave.hansen@linux.intel.com, irogers@google.com,
adrian.hunter@intel.com, jolsa@kernel.org,
alexander.shishkin@linux.intel.com, linux-kernel@vger.kernel.org
Cc: dapeng1.mi@linux.intel.com, ak@linux.intel.com,
zide.chen@intel.com, mark.rutland@arm.com, broonie@kernel.org,
ravi.bangoria@amd.com, eranian@google.com,
Kan Liang <kan.liang@linux.intel.com>
Subject: [PATCH V3 07/17] perf/x86: Move XMM to sample_simd_vec_regs
Date: Fri, 15 Aug 2025 14:34:25 -0700 [thread overview]
Message-ID: <20250815213435.1702022-8-kan.liang@linux.intel.com> (raw)
In-Reply-To: <20250815213435.1702022-1-kan.liang@linux.intel.com>
From: Kan Liang <kan.liang@linux.intel.com>
The XMM0-15 are SIMD registers. Move them from sample_regs to
sample_simd_vec_regs. Reject access to the extended space of the sample_regs
if the new sample_simd_vec_regs is used.
The perf_reg_value requires the abi to understand the layout of the
sample_regs. Add the abi information in the struct x86_perf_regs.
Implement the X86-specific perf_simd_reg_validate to validate the SIMD
registers configuration from the user tool. Only the XMM0-15 is
supported now. More registers will be added in the following patches.
Implement the X86-specific perf_simd_reg_value to retrieve the XMM
value.
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
arch/x86/events/core.c | 38 ++++++++++++++++-
arch/x86/events/intel/ds.c | 2 +-
arch/x86/events/perf_event.h | 12 ++++++
arch/x86/include/asm/perf_event.h | 1 +
arch/x86/include/uapi/asm/perf_regs.h | 6 +++
arch/x86/kernel/perf_regs.c | 61 ++++++++++++++++++++++++++-
6 files changed, 117 insertions(+), 3 deletions(-)
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index f27c58f4c815..1789b91c95c6 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -709,6 +709,22 @@ int x86_pmu_hw_config(struct perf_event *event)
return -EINVAL;
if (!(x86_pmu.ext_regs_mask & XFEATURE_MASK_SSE))
return -EINVAL;
+ if (event->attr.sample_simd_regs_enabled)
+ return -EINVAL;
+ }
+
+ if (event_has_simd_regs(event)) {
+ if (!(event->pmu->capabilities & PERF_PMU_CAP_SIMD_REGS))
+ return -EINVAL;
+ /* Not require any vector registers but set width */
+ if (event->attr.sample_simd_vec_reg_qwords &&
+ !event->attr.sample_simd_vec_reg_intr &&
+ !event->attr.sample_simd_vec_reg_user)
+ return -EINVAL;
+ /* The vector registers set is not supported */
+ if (event->attr.sample_simd_vec_reg_qwords >= PERF_X86_XMM_QWORDS &&
+ !(x86_pmu.ext_regs_mask & XFEATURE_MASK_SSE))
+ return -EINVAL;
}
}
return x86_setup_perfctr(event);
@@ -1784,6 +1800,16 @@ void x86_pmu_setup_regs_data(struct perf_event *event,
data->dyn_size += sizeof(u64);
if (data->regs_user.regs)
data->dyn_size += hweight64(attr->sample_regs_user) * sizeof(u64);
+ if (attr->sample_simd_regs_enabled && data->regs_user.abi) {
+ /* num and qwords of vector and pred registers */
+ data->dyn_size += sizeof(u64);
+ /* data[] */
+ data->dyn_size += hweight64(attr->sample_simd_vec_reg_user) *
+ sizeof(u64) *
+ attr->sample_simd_vec_reg_qwords;
+ data->regs_user.abi |= PERF_SAMPLE_REGS_ABI_SIMD;
+ }
+ perf_regs->abi = data->regs_user.abi;
data->sample_flags |= PERF_SAMPLE_REGS_USER;
}
@@ -1793,10 +1819,20 @@ void x86_pmu_setup_regs_data(struct perf_event *event,
data->dyn_size += sizeof(u64);
if (data->regs_intr.regs)
data->dyn_size += hweight64(attr->sample_regs_intr) * sizeof(u64);
+ if (attr->sample_simd_regs_enabled && data->regs_intr.abi) {
+ /* num and qwords of vector and pred registers */
+ data->dyn_size += sizeof(u64);
+ /* data[] */
+ data->dyn_size += hweight64(attr->sample_simd_vec_reg_intr) *
+ sizeof(u64) *
+ attr->sample_simd_vec_reg_qwords;
+ data->regs_intr.abi |= PERF_SAMPLE_REGS_ABI_SIMD;
+ }
+ perf_regs->abi = data->regs_intr.abi;
data->sample_flags |= PERF_SAMPLE_REGS_INTR;
}
- if (event_has_extended_regs(event)) {
+ if (event_needs_xmm(event)) {
perf_regs->xmm_regs = NULL;
mask |= XFEATURE_MASK_SSE;
}
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 9cdece014ac0..4887f6ea7dde 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1415,7 +1415,7 @@ static u64 pebs_update_adaptive_cfg(struct perf_event *event)
if (gprs || (attr->precise_ip < 2) || tsx_weight)
pebs_data_cfg |= PEBS_DATACFG_GP;
- if (event_has_extended_regs(event))
+ if (event_needs_xmm(event))
pebs_data_cfg |= PEBS_DATACFG_XMMS;
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 7bf24842b1dc..6f22ed718a75 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -133,6 +133,18 @@ static inline bool is_acr_event_group(struct perf_event *event)
return check_leader_group(event->group_leader, PERF_X86_EVENT_ACR);
}
+static inline bool event_needs_xmm(struct perf_event *event)
+{
+ if (event->attr.sample_simd_regs_enabled &&
+ event->attr.sample_simd_vec_reg_qwords >= PERF_X86_XMM_QWORDS)
+ return true;
+
+ if (!event->attr.sample_simd_regs_enabled &&
+ event_has_extended_regs(event))
+ return true;
+ return false;
+}
+
struct amd_nb {
int nb_id; /* NorthBridge id */
int refcnt; /* reference count */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f36f04bc95f1..538219c59979 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -592,6 +592,7 @@ extern void perf_events_lapic_init(void);
struct pt_regs;
struct x86_perf_regs {
struct pt_regs regs;
+ u64 abi;
union {
u64 *xmm_regs;
u32 *xmm_space; /* for xsaves */
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index 7c9d2bb3833b..bd8af802f757 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -55,4 +55,10 @@ enum perf_event_x86_regs {
#define PERF_REG_EXTENDED_MASK (~((1ULL << PERF_REG_X86_XMM0) - 1))
+#define PERF_X86_SIMD_VEC_REGS_MAX 16
+#define PERF_X86_SIMD_VEC_MASK GENMASK_ULL(PERF_X86_SIMD_VEC_REGS_MAX - 1, 0)
+
+#define PERF_X86_XMM_QWORDS 2
+#define PERF_X86_SIMD_QWORDS_MAX PERF_X86_XMM_QWORDS
+
#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 624703af80a1..397357c5896b 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -57,12 +57,27 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
#endif
};
+void perf_simd_reg_check(struct pt_regs *regs,
+ u64 mask, u16 *nr_vectors, u16 *vec_qwords,
+ u16 pred_mask, u16 *nr_pred, u16 *pred_qwords)
+{
+ struct x86_perf_regs *perf_regs = container_of(regs, struct x86_perf_regs, regs);
+
+ if (*vec_qwords >= PERF_X86_XMM_QWORDS && !perf_regs->xmm_regs)
+ *nr_vectors = 0;
+
+ *nr_pred = 0;
+}
+
u64 perf_reg_value(struct pt_regs *regs, int idx)
{
struct x86_perf_regs *perf_regs;
if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
perf_regs = container_of(regs, struct x86_perf_regs, regs);
+ /* SIMD registers are moved to dedicated sample_simd_vec_reg */
+ if (perf_regs->abi & PERF_SAMPLE_REGS_ABI_SIMD)
+ return 0;
if (!perf_regs->xmm_regs)
return 0;
return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
@@ -74,6 +89,49 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
return regs_get_register(regs, pt_regs_offset[idx]);
}
+u64 perf_simd_reg_value(struct pt_regs *regs, int idx,
+ u16 qwords_idx, bool pred)
+{
+ struct x86_perf_regs *perf_regs = container_of(regs, struct x86_perf_regs, regs);
+
+ if (pred)
+ return 0;
+
+ if (WARN_ON_ONCE(idx >= PERF_X86_SIMD_VEC_REGS_MAX ||
+ qwords_idx >= PERF_X86_SIMD_QWORDS_MAX))
+ return 0;
+
+ if (qwords_idx < PERF_X86_XMM_QWORDS) {
+ if (!perf_regs->xmm_regs)
+ return 0;
+ return perf_regs->xmm_regs[idx * PERF_X86_XMM_QWORDS + qwords_idx];
+ }
+
+ return 0;
+}
+
+int perf_simd_reg_validate(u16 vec_qwords, u64 vec_mask,
+ u16 pred_qwords, u32 pred_mask)
+{
+ /* pred_qwords implies sample_simd_{pred,vec}_reg_* are supported */
+ if (!pred_qwords)
+ return 0;
+
+ if (!vec_qwords) {
+ if (vec_mask)
+ return -EINVAL;
+ } else {
+ if (vec_qwords != PERF_X86_XMM_QWORDS)
+ return -EINVAL;
+ if (vec_mask & ~PERF_X86_SIMD_VEC_MASK)
+ return -EINVAL;
+ }
+ if (pred_mask)
+ return -EINVAL;
+
+ return 0;
+}
+
#define PERF_REG_X86_RESERVED (((1ULL << PERF_REG_X86_XMM0) - 1) & \
~((1ULL << PERF_REG_X86_MAX) - 1))
@@ -114,7 +172,8 @@ void perf_get_regs_user(struct perf_regs *regs_user,
int perf_reg_validate(u64 mask)
{
- if (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED)))
+ /* The mask could be 0 if only the SIMD registers are interested */
+ if (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))
return -EINVAL;
return 0;
--
2.38.1
next prev parent reply other threads:[~2025-08-15 21:35 UTC|newest]
Thread overview: 32+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-15 21:34 [PATCH V3 00/17] Support vector and more extended registers in perf kan.liang
2025-08-15 21:34 ` [PATCH V3 01/17] perf/x86: Use x86_perf_regs in the x86 nmi handler kan.liang
2025-08-15 21:34 ` [PATCH V3 02/17] perf/x86: Setup the regs data kan.liang
2025-08-15 21:34 ` [PATCH V3 03/17] x86/fpu/xstate: Add xsaves_nmi kan.liang
2025-08-15 21:34 ` [PATCH V3 04/17] perf: Move has_extended_regs() to header file kan.liang
2025-08-15 21:34 ` [PATCH V3 05/17] perf/x86: Support XMM register for non-PEBS and REGS_USER kan.liang
2025-08-19 13:39 ` Peter Zijlstra
2025-08-19 15:55 ` Liang, Kan
2025-08-20 9:46 ` Mi, Dapeng
2025-08-20 18:03 ` Liang, Kan
2025-08-21 1:00 ` Mi, Dapeng
2025-08-15 21:34 ` [PATCH V3 06/17] perf: Support SIMD registers kan.liang
2025-08-20 9:55 ` Mi, Dapeng
2025-08-20 18:08 ` Liang, Kan
2025-08-15 21:34 ` kan.liang [this message]
2025-08-15 21:34 ` [PATCH V3 08/17] perf/x86: Add YMM into sample_simd_vec_regs kan.liang
2025-08-20 9:59 ` Mi, Dapeng
2025-08-20 18:10 ` Liang, Kan
2025-08-15 21:34 ` [PATCH V3 09/17] perf/x86: Add ZMM " kan.liang
2025-08-15 21:34 ` [PATCH V3 10/17] perf/x86: Add OPMASK into sample_simd_pred_reg kan.liang
2025-08-15 21:34 ` [PATCH V3 11/17] perf/x86: Add eGPRs into sample_regs kan.liang
2025-08-20 10:01 ` Mi, Dapeng
2025-08-15 21:34 ` [PATCH V3 12/17] perf/x86: Add SSP " kan.liang
2025-08-15 21:34 ` [PATCH V3 13/17] perf/x86/intel: Enable PERF_PMU_CAP_SIMD_REGS kan.liang
2025-08-15 21:34 ` [POC PATCH 14/17] perf/x86/regs: Only support legacy regs for the PT and PERF_REGS_MASK for now kan.liang
2025-08-25 9:07 ` Adrian Hunter
2025-08-15 21:34 ` [POC PATCH 15/17] tools headers: Sync with the kernel sources kan.liang
2025-08-15 21:34 ` [POC PATCH 16/17] perf parse-regs: Support the new SIMD format kan.liang
2025-08-20 10:04 ` Mi, Dapeng
2025-08-20 18:18 ` Liang, Kan
2025-08-21 3:35 ` Mi, Dapeng
2025-08-15 21:34 ` [POC PATCH 17/17] perf regs: Support the PERF_SAMPLE_REGS_ABI_SIMD kan.liang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250815213435.1702022-8-kan.liang@linux.intel.com \
--to=kan.liang@linux.intel.com \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=broonie@kernel.org \
--cc=dapeng1.mi@linux.intel.com \
--cc=dave.hansen@linux.intel.com \
--cc=eranian@google.com \
--cc=irogers@google.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=ravi.bangoria@amd.com \
--cc=tglx@linutronix.de \
--cc=zide.chen@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.