All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Mi, Dapeng" <dapeng1.mi@linux.intel.com>
To: kan.liang@linux.intel.com, peterz@infradead.org,
	mingo@redhat.com, acme@kernel.org, namhyung@kernel.org,
	tglx@linutronix.de, dave.hansen@linux.intel.com,
	irogers@google.com, adrian.hunter@intel.com, jolsa@kernel.org,
	alexander.shishkin@linux.intel.com, linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com, zide.chen@intel.com, mark.rutland@arm.com,
	broonie@kernel.org, ravi.bangoria@amd.com, eranian@google.com
Subject: Re: [PATCH V3 11/17] perf/x86: Add eGPRs into sample_regs
Date: Wed, 20 Aug 2025 18:01:48 +0800	[thread overview]
Message-ID: <0858437c-e088-4e08-86fa-7ef08fd314a6@linux.intel.com> (raw)
In-Reply-To: <20250815213435.1702022-12-kan.liang@linux.intel.com>


On 8/16/2025 5:34 AM, kan.liang@linux.intel.com wrote:
> From: Kan Liang <kan.liang@linux.intel.com>
>
> The eGPRs is only supported when the new SIMD registers configuration
> method is used, which moves the XMM to sample_simd_vec_regs. So the
> space can be reclaimed for the eGPRs.
>
> The eGPRs is retrieved by XSAVE. Only support the eGPRs for X86_64.
>
> Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
> ---
>  arch/x86/events/core.c                | 39 +++++++++++++++++++++------
>  arch/x86/include/asm/perf_event.h     |  4 +++
>  arch/x86/include/uapi/asm/perf_regs.h | 26 ++++++++++++++++--
>  arch/x86/kernel/perf_regs.c           | 31 ++++++++++-----------
>  4 files changed, 75 insertions(+), 25 deletions(-)
>
> diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
> index 1fa550efcdfa..f816290defc1 100644
> --- a/arch/x86/events/core.c
> +++ b/arch/x86/events/core.c
> @@ -432,6 +432,8 @@ static void x86_pmu_get_ext_regs(struct x86_perf_regs *perf_regs, u64 mask)
>  		perf_regs->h16zmm = get_xsave_addr(xsave, XFEATURE_Hi16_ZMM);
>  	if (valid_mask & XFEATURE_MASK_OPMASK)
>  		perf_regs->opmask = get_xsave_addr(xsave, XFEATURE_OPMASK);
> +	if (valid_mask & XFEATURE_MASK_APX)
> +		perf_regs->egpr = get_xsave_addr(xsave, XFEATURE_APX);
>  }
>  
>  static void release_ext_regs_buffers(void)
> @@ -709,17 +711,33 @@ int x86_pmu_hw_config(struct perf_event *event)
>  	}
>  
>  	if (event->attr.sample_type & (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)) {
> -		/*
> -		 * Besides the general purpose registers, XMM registers may
> -		 * be collected as well.
> -		 */
> -		if (event_has_extended_regs(event)) {
> -			if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS))
> +		if (event->attr.sample_simd_regs_enabled) {
> +			u64 reserved = ~GENMASK_ULL(PERF_REG_X86_64_MAX - 1, 0);
> +
> +			if (!(event->pmu->capabilities & PERF_PMU_CAP_SIMD_REGS))
>  				return -EINVAL;
> -			if (!(x86_pmu.ext_regs_mask & XFEATURE_MASK_SSE))
> +			/*
> +			 * The XMM space in the perf_event_x86_regs is reclaimed
> +			 * for eGPRs and other general registers.
> +			 */
> +			if (event->attr.sample_regs_user & reserved ||
> +			    event->attr.sample_regs_intr & reserved)
>  				return -EINVAL;
> -			if (event->attr.sample_simd_regs_enabled)
> +			if ((event->attr.sample_regs_user & PERF_X86_EGPRS_MASK ||
> +			     event->attr.sample_regs_intr & PERF_X86_EGPRS_MASK) &&
> +			     !(x86_pmu.ext_regs_mask & XFEATURE_MASK_APX))
>  				return -EINVAL;
> +		} else {
> +			/*
> +			 * Besides the general purpose registers, XMM registers may
> +			 * be collected as well.
> +			 */
> +			if (event_has_extended_regs(event)) {
> +				if (!(event->pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS))
> +					return -EINVAL;
> +				if (!(x86_pmu.ext_regs_mask & XFEATURE_MASK_SSE))
> +					return -EINVAL;
> +			}
>  		}
>  
>  		if (event_has_simd_regs(event)) {
> @@ -1881,6 +1899,11 @@ void x86_pmu_setup_regs_data(struct perf_event *event,
>  			perf_regs->opmask_regs = NULL;
>  			mask |= XFEATURE_MASK_OPMASK;
>  		}
> +		if (attr->sample_regs_user & PERF_X86_EGPRS_MASK ||
> +		    attr->sample_regs_intr & PERF_X86_EGPRS_MASK) {
> +			perf_regs->egpr_regs = NULL;
> +			mask |= XFEATURE_MASK_APX;
> +		}
>  	}
>  
>  	mask &= ~ignore_mask;
> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
> index dda677022882..4400cb66bc8e 100644
> --- a/arch/x86/include/asm/perf_event.h
> +++ b/arch/x86/include/asm/perf_event.h
> @@ -613,6 +613,10 @@ struct x86_perf_regs {
>  		u64	*opmask_regs;
>  		struct avx_512_opmask_state *opmask;
>  	};
> +	union {
> +		u64	*egpr_regs;
> +		struct apx_state *egpr;
> +	};
>  };
>  
>  extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
> diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
> index dd7bd1dd8d39..cd0f6804debf 100644
> --- a/arch/x86/include/uapi/asm/perf_regs.h
> +++ b/arch/x86/include/uapi/asm/perf_regs.h
> @@ -27,11 +27,31 @@ enum perf_event_x86_regs {
>  	PERF_REG_X86_R13,
>  	PERF_REG_X86_R14,
>  	PERF_REG_X86_R15,
> +	/* Extended GPRs (EGPRs) */
> +	PERF_REG_X86_R16,
> +	PERF_REG_X86_R17,
> +	PERF_REG_X86_R18,
> +	PERF_REG_X86_R19,
> +	PERF_REG_X86_R20,
> +	PERF_REG_X86_R21,
> +	PERF_REG_X86_R22,
> +	PERF_REG_X86_R23,
> +	PERF_REG_X86_R24,
> +	PERF_REG_X86_R25,
> +	PERF_REG_X86_R26,
> +	PERF_REG_X86_R27,
> +	PERF_REG_X86_R28,
> +	PERF_REG_X86_R29,
> +	PERF_REG_X86_R30,
> +	PERF_REG_X86_R31,
>  	/* These are the limits for the GPRs. */
>  	PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
> -	PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
> +	PERF_REG_X86_64_MAX = PERF_REG_X86_R31 + 1,
>  
> -	/* These all need two bits set because they are 128bit */
> +	/*
> +	 * These all need two bits set because they are 128bit.
> +	 * These are only available when !PERF_SAMPLE_REGS_ABI_SIMD
> +	 */

The eGPR indexes are overlapped with XMM indexes. User may get confused
about this, we'd better add comments to explain it.


>  	PERF_REG_X86_XMM0  = 32,
>  	PERF_REG_X86_XMM1  = 34,
>  	PERF_REG_X86_XMM2  = 36,
> @@ -55,6 +75,8 @@ enum perf_event_x86_regs {
>  
>  #define PERF_REG_EXTENDED_MASK	(~((1ULL << PERF_REG_X86_XMM0) - 1))
>  
> +#define PERF_X86_EGPRS_MASK		GENMASK_ULL(PERF_REG_X86_R31, PERF_REG_X86_R16)
> +
>  #define PERF_X86_SIMD_PRED_REGS_MAX	8
>  #define PERF_X86_SIMD_PRED_MASK		GENMASK(PERF_X86_SIMD_PRED_REGS_MAX - 1, 0)
>  #define PERF_X86_SIMD_VEC_REGS_MAX	32
> diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
> index 5e815f806605..b6e50194ff3e 100644
> --- a/arch/x86/kernel/perf_regs.c
> +++ b/arch/x86/kernel/perf_regs.c
> @@ -83,14 +83,22 @@ u64 perf_reg_value(struct pt_regs *regs, int idx)
>  {
>  	struct x86_perf_regs *perf_regs;
>  
> -	if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
> +	if (idx > PERF_REG_X86_R15) {
>  		perf_regs = container_of(regs, struct x86_perf_regs, regs);
> -		/* SIMD registers are moved to dedicated sample_simd_vec_reg */
> -		if (perf_regs->abi & PERF_SAMPLE_REGS_ABI_SIMD)
> -			return 0;
> -		if (!perf_regs->xmm_regs)
> -			return 0;
> -		return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
> +
> +		if (perf_regs->abi & PERF_SAMPLE_REGS_ABI_SIMD) {
> +			if (idx <= PERF_REG_X86_R31) {
> +				if (!perf_regs->egpr_regs)
> +					return 0;
> +				return perf_regs->egpr_regs[idx - PERF_REG_X86_R16];
> +			}
> +		} else {
> +			if (idx >= PERF_REG_X86_XMM0 && idx < PERF_REG_X86_XMM_MAX) {
> +				if (!perf_regs->xmm_regs)
> +					return 0;
> +				return perf_regs->xmm_regs[idx - PERF_REG_X86_XMM0];
> +			}
> +		}
>  	}
>  
>  	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
> @@ -171,14 +179,7 @@ int perf_simd_reg_validate(u16 vec_qwords, u64 vec_mask,
>  				 ~((1ULL << PERF_REG_X86_MAX) - 1))
>  
>  #ifdef CONFIG_X86_32
> -#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_R8) | \
> -		       (1ULL << PERF_REG_X86_R9) | \
> -		       (1ULL << PERF_REG_X86_R10) | \
> -		       (1ULL << PERF_REG_X86_R11) | \
> -		       (1ULL << PERF_REG_X86_R12) | \
> -		       (1ULL << PERF_REG_X86_R13) | \
> -		       (1ULL << PERF_REG_X86_R14) | \
> -		       (1ULL << PERF_REG_X86_R15))
> +#define REG_NOSUPPORT GENMASK_ULL(PERF_REG_X86_R31, PERF_REG_X86_R8)
>  
>  int perf_reg_validate(u64 mask)
>  {

  reply	other threads:[~2025-08-20 10:01 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-15 21:34 [PATCH V3 00/17] Support vector and more extended registers in perf kan.liang
2025-08-15 21:34 ` [PATCH V3 01/17] perf/x86: Use x86_perf_regs in the x86 nmi handler kan.liang
2025-08-15 21:34 ` [PATCH V3 02/17] perf/x86: Setup the regs data kan.liang
2025-08-15 21:34 ` [PATCH V3 03/17] x86/fpu/xstate: Add xsaves_nmi kan.liang
2025-08-15 21:34 ` [PATCH V3 04/17] perf: Move has_extended_regs() to header file kan.liang
2025-08-15 21:34 ` [PATCH V3 05/17] perf/x86: Support XMM register for non-PEBS and REGS_USER kan.liang
2025-08-19 13:39   ` Peter Zijlstra
2025-08-19 15:55     ` Liang, Kan
2025-08-20  9:46       ` Mi, Dapeng
2025-08-20 18:03         ` Liang, Kan
2025-08-21  1:00           ` Mi, Dapeng
2025-08-15 21:34 ` [PATCH V3 06/17] perf: Support SIMD registers kan.liang
2025-08-20  9:55   ` Mi, Dapeng
2025-08-20 18:08     ` Liang, Kan
2025-08-15 21:34 ` [PATCH V3 07/17] perf/x86: Move XMM to sample_simd_vec_regs kan.liang
2025-08-15 21:34 ` [PATCH V3 08/17] perf/x86: Add YMM into sample_simd_vec_regs kan.liang
2025-08-20  9:59   ` Mi, Dapeng
2025-08-20 18:10     ` Liang, Kan
2025-08-15 21:34 ` [PATCH V3 09/17] perf/x86: Add ZMM " kan.liang
2025-08-15 21:34 ` [PATCH V3 10/17] perf/x86: Add OPMASK into sample_simd_pred_reg kan.liang
2025-08-15 21:34 ` [PATCH V3 11/17] perf/x86: Add eGPRs into sample_regs kan.liang
2025-08-20 10:01   ` Mi, Dapeng [this message]
2025-08-15 21:34 ` [PATCH V3 12/17] perf/x86: Add SSP " kan.liang
2025-08-15 21:34 ` [PATCH V3 13/17] perf/x86/intel: Enable PERF_PMU_CAP_SIMD_REGS kan.liang
2025-08-15 21:34 ` [POC PATCH 14/17] perf/x86/regs: Only support legacy regs for the PT and PERF_REGS_MASK for now kan.liang
2025-08-25  9:07   ` Adrian Hunter
2025-08-15 21:34 ` [POC PATCH 15/17] tools headers: Sync with the kernel sources kan.liang
2025-08-15 21:34 ` [POC PATCH 16/17] perf parse-regs: Support the new SIMD format kan.liang
2025-08-20 10:04   ` Mi, Dapeng
2025-08-20 18:18     ` Liang, Kan
2025-08-21  3:35   ` Mi, Dapeng
2025-08-15 21:34 ` [POC PATCH 17/17] perf regs: Support the PERF_SAMPLE_REGS_ABI_SIMD kan.liang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0858437c-e088-4e08-86fa-7ef08fd314a6@linux.intel.com \
    --to=dapeng1.mi@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=broonie@kernel.org \
    --cc=dave.hansen@linux.intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=ravi.bangoria@amd.com \
    --cc=tglx@linutronix.de \
    --cc=zide.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.