All of lore.kernel.org
 help / color / mirror / Atom feed
From: Dapeng Mi <dapeng1.mi@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	Ian Rogers <irogers@google.com>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Jiri Olsa <jolsa@kernel.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Andi Kleen <ak@linux.intel.com>,
	Eranian Stephane <eranian@google.com>
Cc: Mark Rutland <mark.rutland@arm.com>,
	broonie@kernel.org, Ravi Bangoria <ravi.bangoria@amd.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Zide Chen <zide.chen@intel.com>,
	Falcon Thomas <thomas.falcon@intel.com>,
	Dapeng Mi <dapeng1.mi@intel.com>,
	Xudong Hao <xudong.hao@intel.com>,
	Dapeng Mi <dapeng1.mi@linux.intel.com>,
	Kan Liang <kan.liang@linux.intel.com>
Subject: [Patch v8 18/23] perf/x86: Support eGPRs sampling using sample_regs_* fields
Date: Fri, 29 May 2026 15:56:40 +0800	[thread overview]
Message-ID: <20260529075645.580362-19-dapeng1.mi@linux.intel.com> (raw)
In-Reply-To: <20260529075645.580362-1-dapeng1.mi@linux.intel.com>

This patch supports sampling of APX eGPRs (R16 ~ R31) via the
sample_regs_* fields.

To sample eGPRs, the sample_simd_regs_enabled field must be set. This
allows the spare space (reclaimed from the original XMM space) in the
sample_regs_* fields to be used for representing eGPRs.

The perf_reg_value() function needs to check if the
PERF_SAMPLE_REGS_ABI_SIMD flag is set first, and then determine whether
to output eGPRs or legacy XMM registers to userspace.

The perf_reg_validate() function first checks the simd_enabled argument
to determine if the eGPRs bitmap is represented in sample_regs_* fields.
It then validates the eGPRs bitmap accordingly.

Currently, eGPRs sampling is only supported on the x86_64 architecture, as
APX is only available on x86_64 platforms.

Please note eGPRs sampling is not enabled yet, it will be enabled in a
later patch when PERF_PMU_CAP_SIMD_REGS is set.

Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
 arch/x86/events/core.c                | 43 ++++++++++++++++++---------
 arch/x86/events/intel/core.c          |  4 ++-
 arch/x86/events/perf_event.h          | 10 +++++++
 arch/x86/include/asm/perf_event.h     |  4 +++
 arch/x86/include/uapi/asm/perf_regs.h | 26 ++++++++++++++++
 arch/x86/kernel/perf_regs.c           | 43 ++++++++++++++++-----------
 6 files changed, 98 insertions(+), 32 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index d4516d3b5d5a..af874ff3d048 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -708,26 +708,24 @@ int x86_pmu_hw_config(struct perf_event *event)
 	}
 
 	if (event->attr.sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) {
-		/*
-		 * Besides the general purpose registers, XMM registers may
-		 * be collected as well.
-		 */
-		if (event_has_extended_regs(event)) {
-			if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS))
-				return -EINVAL;
-			if (is_sampling_event(event) && !event->attr.precise_ip &&
-			    !this_cpu_has(X86_FEATURE_XSAVES))
-				return -EINVAL;
-			if (event->attr.sample_simd_regs_enabled)
-				return -EINVAL;
-		}
-
 		if (event_has_simd_regs(event)) {
+			u64 reserved = ~GENMASK_ULL(PERF_REG_MISC_MAX - 1, 0);
+
 			if (!(event->pmu->capabilities & PERF_PMU_CAP_SIMD_REGS))
 				return -EINVAL;
 			if (is_sampling_event(event) && !event->attr.precise_ip &&
 			    !this_cpu_has(X86_FEATURE_XSAVES))
 				return -EINVAL;
+			/*
+			 * The XMM space in the perf_event_x86_regs is reclaimed
+			 * for eGPRs and other general registers.
+			 */
+			if ((event->attr.sample_regs_user & reserved) ||
+			    (event->attr.sample_regs_intr & reserved))
+				return -EINVAL;
+			if (event_needs_egprs(event) &&
+			    !(x86_pmu.ext_regs_mask & XFEATURE_MASK_APX))
+				return -EINVAL;
 			/* The vector registers set is not supported */
 			if (event_needs_xmm(event) &&
 			    !(x86_pmu.ext_regs_mask & XFEATURE_MASK_SSE))
@@ -744,6 +742,18 @@ int x86_pmu_hw_config(struct perf_event *event)
 			if (event_needs_opmask(event) &&
 			    !(x86_pmu.ext_regs_mask & XFEATURE_MASK_OPMASK))
 				return -EINVAL;
+		} else {
+			/*
+			 * Besides the general purpose registers, XMM registers may
+			 * be collected as well.
+			 */
+			if (event_has_extended_regs(event)) {
+				if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS))
+					return -EINVAL;
+				if (is_sampling_event(event) && !event->attr.precise_ip &&
+				    !this_cpu_has(X86_FEATURE_XSAVES))
+					return -EINVAL;
+			}
 		}
 	}
 
@@ -1792,6 +1802,7 @@ void x86_pmu_clear_perf_regs(struct pt_regs *regs)
 	perf_regs->zmmh_regs = NULL;
 	perf_regs->h16zmm_regs = NULL;
 	perf_regs->opmask_regs = NULL;
+	perf_regs->egpr_regs = NULL;
 }
 
 static void update_perf_regs(struct x86_perf_regs *perf_regs,
@@ -1815,6 +1826,8 @@ static void update_perf_regs(struct x86_perf_regs *perf_regs,
 		perf_regs->h16zmm = get_xsave_addr(xsave, XFEATURE_Hi16_ZMM);
 	if (mask & XFEATURE_MASK_OPMASK)
 		perf_regs->opmask = get_xsave_addr(xsave, XFEATURE_OPMASK);
+	if (mask & XFEATURE_MASK_APX)
+		perf_regs->egpr = get_xsave_addr(xsave, XFEATURE_APX);
 }
 
 /*
@@ -1999,6 +2012,8 @@ static void x86_pmu_sample_xregs(struct perf_event *event,
 		mask |= XFEATURE_MASK_Hi16_ZMM;
 	if (event_needs_opmask(event))
 		mask |= XFEATURE_MASK_OPMASK;
+	if (event_needs_egprs(event))
+		mask |= XFEATURE_MASK_APX;
 
 	mask &= x86_pmu.ext_regs_mask;
 	if (sample_type & PERF_SAMPLE_REGS_USER) {
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 6c06558c416f..a2473f962681 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4698,7 +4698,9 @@ static void intel_pebs_aliases_skl(struct perf_event *event)
 static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
 {
 	unsigned long flags = x86_pmu.large_pebs_flags;
-	u64 gprs_mask = PEBS_GP_REGS | PERF_REG_EXTENDED_MASK;
+	u64 gprs_mask = event->attr.sample_simd_regs_enabled ?
+			PEBS_GP_REGS :
+			PEBS_GP_REGS | PERF_REG_EXTENDED_MASK;
 
 	if (event->attr.use_clockid)
 		flags &= ~PERF_SAMPLE_TIME;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 22b846999cfa..4cc490aa04fc 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -197,6 +197,16 @@ static inline bool event_needs_opmask(struct perf_event *event)
 	return false;
 }
 
+static inline bool event_needs_egprs(struct perf_event *event)
+{
+	if (event->attr.sample_simd_regs_enabled &&
+	    (event->attr.sample_regs_user & PERF_X86_EGPRS_MASK ||
+	     event->attr.sample_regs_intr & PERF_X86_EGPRS_MASK))
+		return true;
+
+	return false;
+}
+
 struct amd_nb {
 	int nb_id;  /* NorthBridge id */
 	int refcnt; /* reference count */
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 7e8b60bddd5a..a54ea8fa6a04 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -747,6 +747,10 @@ struct x86_perf_regs {
 		u64	*opmask_regs;
 		struct avx_512_opmask_state *opmask;
 	};
+	union {
+		u64	*egpr_regs;
+		struct apx_state *egpr;
+	};
 };
 
 extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index 24c981ba8bae..8774a1290fbe 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -27,9 +27,34 @@ enum perf_event_x86_regs {
 	PERF_REG_X86_R13,
 	PERF_REG_X86_R14,
 	PERF_REG_X86_R15,
+	/*
+	 * The eGPRs and XMM have overlaps. Only one can be used
+	 * at a time. The ABI PERF_SAMPLE_REGS_ABI_SIMD is used to
+	 * distinguish which one is used. If PERF_SAMPLE_REGS_ABI_SIMD
+	 * is set, then eGPRs is used, otherwise, XMM is used.
+	 *
+	 * Extended GPRs (eGPRs)
+	 */
+	PERF_REG_X86_R16,
+	PERF_REG_X86_R17,
+	PERF_REG_X86_R18,
+	PERF_REG_X86_R19,
+	PERF_REG_X86_R20,
+	PERF_REG_X86_R21,
+	PERF_REG_X86_R22,
+	PERF_REG_X86_R23,
+	PERF_REG_X86_R24,
+	PERF_REG_X86_R25,
+	PERF_REG_X86_R26,
+	PERF_REG_X86_R27,
+	PERF_REG_X86_R28,
+	PERF_REG_X86_R29,
+	PERF_REG_X86_R30,
+	PERF_REG_X86_R31,
 	/* These are the limits for the GPRs. */
 	PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
 	PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+	PERF_REG_MISC_MAX = PERF_REG_X86_R31 + 1,
 
 	/* These all need two bits set because they are 128bit */
 	PERF_REG_X86_XMM0  = 32,
@@ -54,6 +79,7 @@ enum perf_event_x86_regs {
 };
 
 #define PERF_REG_EXTENDED_MASK	(~((1ULL << PERF_REG_X86_XMM0) - 1))
+#define PERF_X86_EGPRS_MASK	__GENMASK_ULL(PERF_REG_X86_R31, PERF_REG_X86_R16)
 
 enum {
 	PERF_X86_SIMD_XMM_REGS      = 16,
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 79803b3b6d6b..006883ad443d 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -61,14 +61,24 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
 	struct x86_perf_regs *perf_regs;
 
-	if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
+	if (idx > PERF_REG_X86_R15) {
 		perf_regs = container_of(regs, struct x86_perf_regs, regs);
-		/* SIMD registers are moved to dedicated sample_simd_vec_reg */
-		if (perf_regs->abi & PERF_SAMPLE_REGS_ABI_SIMD)
+		if (perf_regs->abi == PERF_SAMPLE_REGS_ABI_NONE)
 			return 0;
-		if (!perf_regs->xmm_regs)
-			return 0;
-		return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
+
+		if (perf_regs->abi & PERF_SAMPLE_REGS_ABI_SIMD) {
+			if (idx <= PERF_REG_X86_R31) {
+				if (!perf_regs->egpr_regs)
+					return 0;
+				return perf_regs->egpr_regs[idx - PERF_REG_X86_R16];
+			}
+		} else {
+			if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
+				if (!perf_regs->xmm_regs)
+					return 0;
+				return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
+			}
+		}
 	}
 
 	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
@@ -179,18 +189,12 @@ int perf_simd_reg_validate(u16 simd_enabled, u16 vec_qwords,
 	return 0;
 }
 
-#define PERF_REG_X86_RESERVED	(((1ULL << PERF_REG_X86_XMM0) - 1) & \
-				 ~((1ULL << PERF_REG_X86_MAX) - 1))
+#define PERF_REG_X86_RESERVED	(GENMASK_ULL(PERF_REG_X86_XMM0 - 1, PERF_REG_X86_AX) & \
+				 ~GENMASK_ULL(PERF_REG_X86_R15, PERF_REG_X86_AX))
+#define PERF_REG_X86_EXT_RESERVED	(~GENMASK_ULL(PERF_REG_MISC_MAX - 1, PERF_REG_X86_AX))
 
 #ifdef CONFIG_X86_32
-#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
-		       (1ULL << PERF_REG_X86_R9) | \
-		       (1ULL << PERF_REG_X86_R10) | \
-		       (1ULL << PERF_REG_X86_R11) | \
-		       (1ULL << PERF_REG_X86_R12) | \
-		       (1ULL << PERF_REG_X86_R13) | \
-		       (1ULL << PERF_REG_X86_R14) | \
-		       (1ULL << PERF_REG_X86_R15))
+#define REG_NOSUPPORT GENMASK_ULL(PERF_REG_X86_R15, PERF_REG_X86_R8)
 
 int perf_reg_validate(u64 mask, bool simd_enabled)
 {
@@ -214,8 +218,13 @@ u64 perf_reg_abi(struct task_struct *task)
 
 int perf_reg_validate(u64 mask, bool simd_enabled)
 {
+	if (!simd_enabled &&
+	    (!mask || (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))))
+		return -EINVAL;
+
 	/* The mask could be 0 if only the SIMD registers are interested */
-	if (mask & (REG_NOSUPPORT | PERF_REG_X86_RESERVED))
+	if (simd_enabled &&
+	    (mask & (REG_NOSUPPORT | PERF_REG_X86_EXT_RESERVED)))
 		return -EINVAL;
 
 	return 0;
-- 
2.34.1


  parent reply	other threads:[~2026-05-29  8:04 UTC|newest]

Thread overview: 62+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-29  7:56 [Patch v8 00/23] Support SIMD/eGPRs/SSP registers sampling for perf Dapeng Mi
2026-05-29  7:56 ` [Patch v8 01/23] perf/x86/intel: Validate return value of intel_pmu_init_hybrid() Dapeng Mi
2026-05-29  8:53   ` sashiko-bot
2026-05-29 11:11   ` Peter Zijlstra
2026-06-01  1:02     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 02/23] perf/x86: Move hybrid PMU initialization before x86_pmu_starting_cpu() Dapeng Mi
2026-05-29  8:51   ` sashiko-bot
2026-06-01  1:40     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 03/23] perf/x86/intel: Enable large PEBS sampling for XMMs Dapeng Mi
2026-05-29  7:56 ` [Patch v8 04/23] perf/x86/intel: Convert x86_perf_regs to per-cpu variables Dapeng Mi
2026-05-29  7:56 ` [Patch v8 05/23] perf: Eliminate duplicate arch-specific functions definations Dapeng Mi
2026-05-29  7:56 ` [Patch v8 06/23] perf/x86: Use x86_perf_regs in the x86 nmi handlers Dapeng Mi
2026-05-29  7:56 ` [Patch v8 07/23] x86/fpu/xstate: Add xsaves_nmi() helper Dapeng Mi
2026-05-29  8:56   ` sashiko-bot
2026-05-29 11:32   ` Peter Zijlstra
2026-06-01  2:31     ` Mi, Dapeng
2026-06-01  8:28       ` Peter Zijlstra
2026-05-29  7:56 ` [Patch v8 08/23] x86/fpu: Ensure TIF_NEED_FPU_LOAD is set after saving FPU state Dapeng Mi
2026-05-29  7:56 ` [Patch v8 09/23] perf: Move and enhance has_extended_regs() for arch-specific use Dapeng Mi
2026-05-29  7:56 ` [Patch v8 10/23] perf/x86: Enable XMM Register Sampling for Non-PEBS Events Dapeng Mi
2026-05-29  9:02   ` sashiko-bot
2026-06-01  3:11     ` Mi, Dapeng
2026-05-29 11:38   ` Peter Zijlstra
2026-06-01  3:04     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 11/23] perf/x86: Enable XMM register sampling for REGS_USER case Dapeng Mi
2026-05-29  9:24   ` sashiko-bot
2026-06-01  5:57     ` Mi, Dapeng
2026-05-29 11:42   ` Peter Zijlstra
2026-06-01  5:53     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 12/23] perf: Add sampling support for SIMD registers Dapeng Mi
2026-05-29  8:36   ` sashiko-bot
2026-06-01  6:44     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 13/23] perf/x86: Support XMM sampling using sample_simd_vec_reg_* fields Dapeng Mi
2026-05-29  8:49   ` sashiko-bot
2026-06-01  6:57     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 14/23] perf/x86: Support YMM " Dapeng Mi
2026-05-29  8:47   ` sashiko-bot
2026-06-01  7:14     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 15/23] perf/x86: Support ZMM " Dapeng Mi
2026-05-29  7:56 ` [Patch v8 16/23] perf/x86: Support OPMASK sampling using sample_simd_pred_reg_* fields Dapeng Mi
2026-05-29  9:21   ` sashiko-bot
2026-06-01  7:21     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 17/23] perf: Enhance perf_reg_validate() with simd_enabled argument Dapeng Mi
2026-05-29  7:56 ` Dapeng Mi [this message]
2026-05-29  9:31   ` [Patch v8 18/23] perf/x86: Support eGPRs sampling using sample_regs_* fields sashiko-bot
2026-06-01  8:20     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 19/23] perf/x86: Support SSP " Dapeng Mi
2026-05-29 10:03   ` sashiko-bot
2026-06-01  8:54     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 20/23] perf/x86/intel: Support arch-PEBS based SIMD/eGPRs/SSP sampling Dapeng Mi
2026-05-29  9:45   ` sashiko-bot
2026-06-01  9:08     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 21/23] perf/x86/intel: Enable PERF_PMU_CAP_SIMD_REGS capability Dapeng Mi
2026-05-29 10:43   ` sashiko-bot
2026-06-01  9:19     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 22/23] perf/x86: Activate back-to-back NMI detection for arch-PEBS induced NMIs Dapeng Mi
2026-05-29  9:34   ` sashiko-bot
2026-06-01  9:23     ` Mi, Dapeng
2026-05-29  7:56 ` [Patch v8 23/23] perf/x86/intel: Add sanity check for PEBS fragment size Dapeng Mi
2026-05-29  9:54   ` sashiko-bot
2026-06-01  9:42     ` Mi, Dapeng
2026-05-29  8:32 ` [Patch v8 00/23] Support SIMD/eGPRs/SSP registers sampling for perf Mi, Dapeng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260529075645.580362-19-dapeng1.mi@linux.intel.com \
    --to=dapeng1.mi@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=broonie@kernel.org \
    --cc=dapeng1.mi@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=ravi.bangoria@amd.com \
    --cc=tglx@linutronix.de \
    --cc=thomas.falcon@intel.com \
    --cc=xudong.hao@intel.com \
    --cc=zide.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.