From: "Mi, Dapeng" <dapeng1.mi@linux.intel.com>
To: Ian Rogers <irogers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
Adrian Hunter <adrian.hunter@intel.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Andi Kleen <ak@linux.intel.com>,
Eranian Stephane <eranian@google.com>,
linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
Dapeng Mi <dapeng1.mi@intel.com>, Zide Chen <zide.chen@intel.com>,
Falcon Thomas <thomas.falcon@intel.com>,
Xudong Hao <xudong.hao@intel.com>
Subject: Re: [Patch v9 12/12] perf/x86/intel: Add counter group support for arch-PEBS
Date: Tue, 10 Mar 2026 10:06:28 +0800 [thread overview]
Message-ID: <0e6df3b8-d9c7-4b0e-99d4-eb5dd73a5a5e@linux.intel.com> (raw)
In-Reply-To: <CAP-5=fUDfgSC9CcJWg9CpKQo4aTdFbD89_XcmVX8OivRLoEAcA@mail.gmail.com>
On 3/10/2026 6:59 AM, Ian Rogers wrote:
> On Wed, Oct 29, 2025 at 3:25 AM Dapeng Mi <dapeng1.mi@linux.intel.com> wrote:
>> Base on previous adaptive PEBS counter snapshot support, add counter
>> group support for architectural PEBS. Since arch-PEBS shares same
>> counter group layout with adaptive PEBS, directly reuse
>> __setup_pebs_counter_group() helper to process arch-PEBS counter group.
>>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>> ---
>> arch/x86/events/intel/core.c | 38 ++++++++++++++++++++++++++++---
>> arch/x86/events/intel/ds.c | 29 ++++++++++++++++++++---
>> arch/x86/include/asm/msr-index.h | 6 +++++
>> arch/x86/include/asm/perf_event.h | 13 ++++++++---
>> 4 files changed, 77 insertions(+), 9 deletions(-)
>>
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 75cba28b86d5..cb64018321dd 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -3014,6 +3014,17 @@ static void intel_pmu_enable_event_ext(struct perf_event *event)
>>
>> if (pebs_data_cfg & PEBS_DATACFG_LBRS)
>> ext |= ARCH_PEBS_LBR & cap.caps;
>> +
>> + if (pebs_data_cfg &
>> + (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT))
>> + ext |= ARCH_PEBS_CNTR_GP & cap.caps;
>> +
>> + if (pebs_data_cfg &
>> + (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT))
>> + ext |= ARCH_PEBS_CNTR_FIXED & cap.caps;
>> +
>> + if (pebs_data_cfg & PEBS_DATACFG_METRICS)
>> + ext |= ARCH_PEBS_CNTR_METRICS & cap.caps;
>> }
>>
>> if (cpuc->n_pebs == cpuc->n_large_pebs)
>> @@ -3038,6 +3049,9 @@ static void intel_pmu_enable_event_ext(struct perf_event *event)
>> }
>> }
>>
>> + if (is_pebs_counter_event_group(event))
>> + ext |= ARCH_PEBS_CNTR_ALLOW;
>> +
>> if (cpuc->cfg_c_val[hwc->idx] != ext)
>> __intel_pmu_update_event_ext(hwc->idx, ext);
>> }
>> @@ -4323,6 +4337,20 @@ static bool intel_pmu_is_acr_group(struct perf_event *event)
>> return false;
>> }
>>
>> +static inline bool intel_pmu_has_pebs_counter_group(struct pmu *pmu)
>> +{
>> + u64 caps;
>> +
>> + if (x86_pmu.intel_cap.pebs_format >= 6 && x86_pmu.intel_cap.pebs_baseline)
>> + return true;
>> +
>> + caps = hybrid(pmu, arch_pebs_cap).caps;
>> + if (x86_pmu.arch_pebs && (caps & ARCH_PEBS_CNTR_MASK))
>> + return true;
>> +
>> + return false;
>> +}
>> +
>> static inline void intel_pmu_set_acr_cntr_constr(struct perf_event *event,
>> u64 *cause_mask, int *num)
>> {
>> @@ -4471,8 +4499,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
>> }
>>
>> if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
>> - (x86_pmu.intel_cap.pebs_format >= 6) &&
>> - x86_pmu.intel_cap.pebs_baseline &&
>> + intel_pmu_has_pebs_counter_group(event->pmu) &&
>> is_sampling_event(event) &&
>> event->attr.precise_ip)
>> event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
>> @@ -5420,6 +5447,8 @@ static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
>> x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
>> if (caps & ARCH_PEBS_LBR)
>> x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
>> + if (caps & ARCH_PEBS_CNTR_MASK)
>> + x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
>>
>> if (!(caps & ARCH_PEBS_AUX))
>> x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
>> @@ -7134,8 +7163,11 @@ __init int intel_pmu_init(void)
>> * Many features on and after V6 require dynamic constraint,
>> * e.g., Arch PEBS, ACR.
>> */
>> - if (version >= 6)
>> + if (version >= 6) {
>> x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
>> + x86_pmu.late_setup = intel_pmu_late_setup;
>> + }
>> +
>> /*
>> * Install the hw-cache-events table:
>> */
>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>> index c66e9b562de3..c93bf971d97b 100644
>> --- a/arch/x86/events/intel/ds.c
>> +++ b/arch/x86/events/intel/ds.c
>> @@ -1530,13 +1530,20 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
>>
>> u64 intel_get_arch_pebs_data_config(struct perf_event *event)
>> {
>> + struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>> u64 pebs_data_cfg = 0;
>> + u64 cntr_mask;
>>
>> if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
>> return 0;
>>
>> pebs_data_cfg |= pebs_update_adaptive_cfg(event);
>>
>> + cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) |
>> + (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) |
>> + PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS;
>> + pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask;
>> +
>> return pebs_data_cfg;
>> }
>>
>> @@ -2444,6 +2451,24 @@ static void setup_arch_pebs_sample_data(struct perf_event *event,
>> }
>> }
>>
>> + if (header->cntr) {
>> + struct arch_pebs_cntr_header *cntr = next_record;
>> + unsigned int nr;
>> +
>> + next_record += sizeof(struct arch_pebs_cntr_header);
>> +
>> + if (is_pebs_counter_event_group(event)) {
>> + __setup_pebs_counter_group(cpuc, event,
>> + (struct pebs_cntr_header *)cntr, next_record);
>> + data->sample_flags |= PERF_SAMPLE_READ;
>> + }
>> +
>> + nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
>> + if (cntr->metrics == INTEL_CNTR_METRICS)
>> + nr += 2;
>> + next_record += nr * sizeof(u64);
>> + }
>> +
>> /* Parse followed fragments if there are. */
>> if (arch_pebs_record_continued(header)) {
>> at = at + header->size;
>> @@ -3094,10 +3119,8 @@ static void __init intel_ds_pebs_init(void)
>> break;
>>
>> case 6:
>> - if (x86_pmu.intel_cap.pebs_baseline) {
>> + if (x86_pmu.intel_cap.pebs_baseline)
>> x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
>> - x86_pmu.late_setup = intel_pmu_late_setup;
>> - }
> Hi Dapeng,
>
> I'm trying to understand why the late_setup initialization was changed
> here and its connection with counter group support. I couldn't find a
> mention in the commit message.
It's because arch-PEBS also supports counters group sampling, not just the
legacy PEBS with PEBS format 6. Currently ACR (auto counter reload) and
PEBS both needs the late_setup, ACR and counters group sampling (regardless
of legacy PEBS or arch-PEBS) are introduced since Perfmon v6, so the
late_setup initialization is moved to the unified place of perfmon v6
initialization. Thanks.
>
> Thanks,
> Ian
>
>> fallthrough;
>> case 5:
>> x86_pmu.pebs_ept = 1;
>> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
>> index f1ef9ac38bfb..65cc528fbad8 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -334,12 +334,18 @@
>> #define ARCH_PEBS_INDEX_WR_SHIFT 4
>>
>> #define ARCH_PEBS_RELOAD 0xffffffff
>> +#define ARCH_PEBS_CNTR_ALLOW BIT_ULL(35)
>> +#define ARCH_PEBS_CNTR_GP BIT_ULL(36)
>> +#define ARCH_PEBS_CNTR_FIXED BIT_ULL(37)
>> +#define ARCH_PEBS_CNTR_METRICS BIT_ULL(38)
>> #define ARCH_PEBS_LBR_SHIFT 40
>> #define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT)
>> #define ARCH_PEBS_VECR_XMM BIT_ULL(49)
>> #define ARCH_PEBS_GPR BIT_ULL(61)
>> #define ARCH_PEBS_AUX BIT_ULL(62)
>> #define ARCH_PEBS_EN BIT_ULL(63)
>> +#define ARCH_PEBS_CNTR_MASK (ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \
>> + ARCH_PEBS_CNTR_METRICS)
>>
>> #define MSR_IA32_RTIT_CTL 0x00000570
>> #define RTIT_CTL_TRACEEN BIT(0)
>> diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
>> index 3b3848f0d339..7276ba70c88a 100644
>> --- a/arch/x86/include/asm/perf_event.h
>> +++ b/arch/x86/include/asm/perf_event.h
>> @@ -141,16 +141,16 @@
>> #define ARCH_PERFMON_EVENTS_COUNT 7
>>
>> #define PEBS_DATACFG_MEMINFO BIT_ULL(0)
>> -#define PEBS_DATACFG_GP BIT_ULL(1)
>> +#define PEBS_DATACFG_GP BIT_ULL(1)
>> #define PEBS_DATACFG_XMMS BIT_ULL(2)
>> #define PEBS_DATACFG_LBRS BIT_ULL(3)
>> -#define PEBS_DATACFG_LBR_SHIFT 24
>> #define PEBS_DATACFG_CNTR BIT_ULL(4)
>> +#define PEBS_DATACFG_METRICS BIT_ULL(5)
>> +#define PEBS_DATACFG_LBR_SHIFT 24
>> #define PEBS_DATACFG_CNTR_SHIFT 32
>> #define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0)
>> #define PEBS_DATACFG_FIX_SHIFT 48
>> #define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0)
>> -#define PEBS_DATACFG_METRICS BIT_ULL(5)
>>
>> /* Steal the highest bit of pebs_data_cfg for SW usage */
>> #define PEBS_UPDATE_DS_SW BIT_ULL(63)
>> @@ -603,6 +603,13 @@ struct arch_pebs_lbr_header {
>> u64 ler_info;
>> };
>>
>> +struct arch_pebs_cntr_header {
>> + u32 cntr;
>> + u32 fixed;
>> + u32 metrics;
>> + u32 reserved;
>> +};
>> +
>> /*
>> * AMD Extended Performance Monitoring and Debug cpuid feature detection
>> */
>> --
>> 2.34.1
>>
next prev parent reply other threads:[~2026-03-10 2:06 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-10-29 10:21 [Patch v9 00/12] arch-PEBS enabling for Intel platforms Dapeng Mi
2025-10-29 10:21 ` [Patch v9 01/12] perf/x86: Remove redundant is_x86_event() prototype Dapeng Mi
2025-10-29 10:21 ` [Patch v9 02/12] perf/x86: Fix NULL event access and potential PEBS record loss Dapeng Mi
2025-11-06 14:19 ` Peter Zijlstra
2025-10-29 10:21 ` [Patch v9 03/12] perf/x86/intel: Replace x86_pmu.drain_pebs calling with static call Dapeng Mi
2025-10-29 10:21 ` [Patch v9 04/12] perf/x86/intel: Correct large PEBS flag check Dapeng Mi
2025-10-29 10:21 ` [Patch v9 05/12] perf/x86/intel: Initialize architectural PEBS Dapeng Mi
2026-03-05 0:50 ` Ian Rogers
2026-03-06 1:38 ` Mi, Dapeng
2025-10-29 10:21 ` [Patch v9 06/12] perf/x86/intel/ds: Factor out PEBS record processing code to functions Dapeng Mi
2025-10-29 10:21 ` [Patch v9 07/12] perf/x86/intel/ds: Factor out PEBS group " Dapeng Mi
2025-10-29 10:21 ` [Patch v9 08/12] perf/x86/intel: Process arch-PEBS records or record fragments Dapeng Mi
2026-03-03 0:20 ` Chun-Tse Shao
2026-03-06 1:20 ` Mi, Dapeng
2025-10-29 10:21 ` [Patch v9 09/12] perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR Dapeng Mi
2025-10-29 10:21 ` [Patch v9 10/12] perf/x86/intel: Update dyn_constranit base on PEBS event precise level Dapeng Mi
2025-11-06 14:52 ` Peter Zijlstra
2025-11-07 6:11 ` Mi, Dapeng
2025-11-07 8:28 ` Peter Zijlstra
2025-11-07 8:36 ` Mi, Dapeng
2025-11-07 13:05 ` Peter Zijlstra
2025-11-10 0:23 ` Mi, Dapeng
2025-11-10 9:03 ` Peter Zijlstra
2025-11-10 9:15 ` Mi, Dapeng
2025-11-11 5:41 ` Mi, Dapeng
2025-11-11 11:37 ` Peter Zijlstra
2025-11-12 0:16 ` Mi, Dapeng
2025-10-29 10:21 ` [Patch v9 11/12] perf/x86/intel: Setup PEBS data configuration and enable legacy groups Dapeng Mi
2026-03-05 1:20 ` Ian Rogers
2026-03-06 2:17 ` Mi, Dapeng
2025-10-29 10:21 ` [Patch v9 12/12] perf/x86/intel: Add counter group support for arch-PEBS Dapeng Mi
2026-03-09 22:59 ` Ian Rogers
2026-03-10 2:06 ` Mi, Dapeng [this message]
2026-03-10 4:36 ` Ian Rogers
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=0e6df3b8-d9c7-4b0e-99d4-eb5dd73a5a5e@linux.intel.com \
--to=dapeng1.mi@linux.intel.com \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=dapeng1.mi@intel.com \
--cc=eranian@google.com \
--cc=irogers@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
--cc=thomas.falcon@intel.com \
--cc=xudong.hao@intel.com \
--cc=zide.chen@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.