public inbox for linux-perf-users@vger.kernel.org
 help / color / mirror / Atom feed
From: "Mi, Dapeng" <dapeng1.mi@linux.intel.com>
To: Ian Rogers <irogers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Arnaldo Carvalho de Melo <acme@kernel.org>,
	Namhyung Kim <namhyung@kernel.org>,
	Adrian Hunter <adrian.hunter@intel.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Andi Kleen <ak@linux.intel.com>,
	Eranian Stephane <eranian@google.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Dapeng Mi <dapeng1.mi@intel.com>, Zide Chen <zide.chen@intel.com>,
	Falcon Thomas <thomas.falcon@intel.com>,
	Xudong Hao <xudong.hao@intel.com>,
	Kan Liang <kan.liang@linux.intel.com>
Subject: Re: [Patch v9 11/12] perf/x86/intel: Setup PEBS data configuration and enable legacy groups
Date: Fri, 6 Mar 2026 10:17:26 +0800	[thread overview]
Message-ID: <59e94944-78be-4d76-9445-6095093e0742@linux.intel.com> (raw)
In-Reply-To: <CAP-5=fUPw6qs9i9NoxHgnc1wW3MhQqO5KxRnn2S8sDuX5ynUSQ@mail.gmail.com>


On 3/5/2026 9:20 AM, Ian Rogers wrote:
> On Wed, Oct 29, 2025 at 3:25 AM Dapeng Mi <dapeng1.mi@linux.intel.com> wrote:
>> Different with legacy PEBS, arch-PEBS provides per-counter PEBS data
>> configuration by programing MSR IA32_PMC_GPx/FXx_CFG_C MSRs.
>>
>> This patch obtains PEBS data configuration from event attribute and then
>> writes the PEBS data configuration to MSR IA32_PMC_GPx/FXx_CFG_C and
>> enable corresponding PEBS groups.
>>
>> Please notice this patch only enables XMM SIMD regs sampling for
>> arch-PEBS, the other SIMD regs (OPMASK/YMM/ZMM) sampling on arch-PEBS
>> would be supported after PMI based SIMD regs (OPMASK/YMM/ZMM) sampling
>> is supported.
>>
>> Co-developed-by: Kan Liang <kan.liang@linux.intel.com>
>> Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
>> Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
>> ---
>>  arch/x86/events/intel/core.c     | 136 ++++++++++++++++++++++++++++++-
>>  arch/x86/events/intel/ds.c       |  17 ++++
>>  arch/x86/events/perf_event.h     |   4 +
>>  arch/x86/include/asm/intel_ds.h  |   7 ++
>>  arch/x86/include/asm/msr-index.h |   8 ++
>>  5 files changed, 171 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
>> index 40ccfd80d554..75cba28b86d5 100644
>> --- a/arch/x86/events/intel/core.c
>> +++ b/arch/x86/events/intel/core.c
>> @@ -2563,6 +2563,45 @@ static void intel_pmu_disable_fixed(struct perf_event *event)
>>         cpuc->fixed_ctrl_val &= ~mask;
>>  }
>>
>> +static inline void __intel_pmu_update_event_ext(int idx, u64 ext)
>> +{
>> +       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>> +       u32 msr;
>> +
>> +       if (idx < INTEL_PMC_IDX_FIXED) {
>> +               msr = MSR_IA32_PMC_V6_GP0_CFG_C +
>> +                     x86_pmu.addr_offset(idx, false);
>> +       } else {
>> +               msr = MSR_IA32_PMC_V6_FX0_CFG_C +
>> +                     x86_pmu.addr_offset(idx - INTEL_PMC_IDX_FIXED, false);
>> +       }
>> +
>> +       cpuc->cfg_c_val[idx] = ext;
>> +       wrmsrq(msr, ext);
>> +}
>> +
>> +static void intel_pmu_disable_event_ext(struct perf_event *event)
>> +{
>> +       if (!x86_pmu.arch_pebs)
>> +               return;
>> +
>> +       /*
>> +        * Only clear CFG_C MSR for PEBS counter group events,
>> +        * it avoids the HW counter's value to be added into
>> +        * other PEBS records incorrectly after PEBS counter
>> +        * group events are disabled.
>> +        *
>> +        * For other events, it's unnecessary to clear CFG_C MSRs
>> +        * since CFG_C doesn't take effect if counter is in
>> +        * disabled state. That helps to reduce the WRMSR overhead
>> +        * in context switches.
>> +        */
>> +       if (!is_pebs_counter_event_group(event))
>> +               return;
>> +
>> +       __intel_pmu_update_event_ext(event->hw.idx, 0);
>> +}
>> +
>>  static void intel_pmu_disable_event(struct perf_event *event)
>>  {
>>         struct hw_perf_event *hwc = &event->hw;
>> @@ -2571,9 +2610,12 @@ static void intel_pmu_disable_event(struct perf_event *event)
>>         switch (idx) {
>>         case 0 ... INTEL_PMC_IDX_FIXED - 1:
>>                 intel_clear_masks(event, idx);
>> +               intel_pmu_disable_event_ext(event);
>>                 x86_pmu_disable_event(event);
>>                 break;
>>         case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
>> +               intel_pmu_disable_event_ext(event);
>> +               fallthrough;
>>         case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
>>                 intel_pmu_disable_fixed(event);
>>                 break;
>> @@ -2940,6 +2982,66 @@ static void intel_pmu_enable_acr(struct perf_event *event)
>>
>>  DEFINE_STATIC_CALL_NULL(intel_pmu_enable_acr_event, intel_pmu_enable_acr);
>>
>> +static void intel_pmu_enable_event_ext(struct perf_event *event)
>> +{
>> +       struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>> +       struct hw_perf_event *hwc = &event->hw;
>> +       union arch_pebs_index old, new;
>> +       struct arch_pebs_cap cap;
>> +       u64 ext = 0;
>> +
>> +       if (!x86_pmu.arch_pebs)
>> +               return;
>> +
>> +       cap = hybrid(cpuc->pmu, arch_pebs_cap);
>> +
>> +       if (event->attr.precise_ip) {
>> +               u64 pebs_data_cfg = intel_get_arch_pebs_data_config(event);
>> +
>> +               ext |= ARCH_PEBS_EN;
>> +               if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD)
>> +                       ext |= (-hwc->sample_period) & ARCH_PEBS_RELOAD;
> Nit: Perhaps there should be a warning if "hwc->sample_period >
> ARCH_PEBS_RELOAD"?

Hmm, strictly speaking, we should not check if hwc->sample_period is larger
than ARCH_PEBS_RELOAD since it's allowed even hwc->sample_period is larger
than ARCH_PEBS_RELOAD as long as hwc->sample_period is not larger than
x86_pmu.max_period (which is "x86_pmu.cntval_mask >> 1" for modern Intel
platforms). But we indeed need a sample_period check for arch-PEBS, just
like what the below ACR does.

```

        /* The reload value cannot exceeds the max period */
        if (event->attr.sample_period > x86_pmu.max_period)
            return -EINVAL;

```

https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/arch/x86/events/intel/core.c?h=v7.0-rc2#n4789

I would add a patch to do this check. Thanks.


>
> Thanks,
> Ian
>
>> +
>> +               if (pebs_data_cfg && cap.caps) {
>> +                       if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
>> +                               ext |= ARCH_PEBS_AUX & cap.caps;
>> +
>> +                       if (pebs_data_cfg & PEBS_DATACFG_GP)
>> +                               ext |= ARCH_PEBS_GPR & cap.caps;
>> +
>> +                       if (pebs_data_cfg & PEBS_DATACFG_XMMS)
>> +                               ext |= ARCH_PEBS_VECR_XMM & cap.caps;
>> +
>> +                       if (pebs_data_cfg & PEBS_DATACFG_LBRS)
>> +                               ext |= ARCH_PEBS_LBR & cap.caps;
>> +               }
>> +
>> +               if (cpuc->n_pebs == cpuc->n_large_pebs)
>> +                       new.thresh = ARCH_PEBS_THRESH_MULTI;
>> +               else
>> +                       new.thresh = ARCH_PEBS_THRESH_SINGLE;
>> +
>> +               rdmsrq(MSR_IA32_PEBS_INDEX, old.whole);
>> +               if (new.thresh != old.thresh || !old.en) {
>> +                       if (old.thresh == ARCH_PEBS_THRESH_MULTI && old.wr > 0) {
>> +                               /*
>> +                                * Large PEBS was enabled.
>> +                                * Drain PEBS buffer before applying the single PEBS.
>> +                                */
>> +                               intel_pmu_drain_pebs_buffer();
>> +                       } else {
>> +                               new.wr = 0;
>> +                               new.full = 0;
>> +                               new.en = 1;
>> +                               wrmsrq(MSR_IA32_PEBS_INDEX, new.whole);
>> +                       }
>> +               }
>> +       }
>> +
>> +       if (cpuc->cfg_c_val[hwc->idx] != ext)
>> +               __intel_pmu_update_event_ext(hwc->idx, ext);
>> +}
>> +
>>  static void intel_pmu_enable_event(struct perf_event *event)
>>  {
>>         u64 enable_mask = ARCH_PERFMON_EVENTSEL_ENABLE;
>> @@ -2955,10 +3057,12 @@ static void intel_pmu_enable_event(struct perf_event *event)
>>                         enable_mask |= ARCH_PERFMON_EVENTSEL_BR_CNTR;
>>                 intel_set_masks(event, idx);
>>                 static_call_cond(intel_pmu_enable_acr_event)(event);
>> +               intel_pmu_enable_event_ext(event);
>>                 __x86_pmu_enable_event(hwc, enable_mask);
>>                 break;
>>         case INTEL_PMC_IDX_FIXED ... INTEL_PMC_IDX_FIXED_BTS - 1:
>>                 static_call_cond(intel_pmu_enable_acr_event)(event);
>> +               intel_pmu_enable_event_ext(event);
>>                 fallthrough;
>>         case INTEL_PMC_IDX_METRIC_BASE ... INTEL_PMC_IDX_METRIC_END:
>>                 intel_pmu_enable_fixed(event);
>> @@ -5301,6 +5405,30 @@ static inline bool intel_pmu_broken_perf_cap(void)
>>         return false;
>>  }
>>
>> +static inline void __intel_update_pmu_caps(struct pmu *pmu)
>> +{
>> +       struct pmu *dest_pmu = pmu ? pmu : x86_get_pmu(smp_processor_id());
>> +
>> +       if (hybrid(pmu, arch_pebs_cap).caps & ARCH_PEBS_VECR_XMM)
>> +               dest_pmu->capabilities |= PERF_PMU_CAP_EXTENDED_REGS;
>> +}
>> +
>> +static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
>> +{
>> +       u64 caps = hybrid(pmu, arch_pebs_cap).caps;
>> +
>> +       x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
>> +       if (caps & ARCH_PEBS_LBR)
>> +               x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
>> +
>> +       if (!(caps & ARCH_PEBS_AUX))
>> +               x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
>> +       if (!(caps & ARCH_PEBS_GPR)) {
>> +               x86_pmu.large_pebs_flags &=
>> +                       ~(PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER);
>> +       }
>> +}
>> +
>>  #define counter_mask(_gp, _fixed) ((_gp) | ((u64)(_fixed) << INTEL_PMC_IDX_FIXED))
>>
>>  static void update_pmu_cap(struct pmu *pmu)
>> @@ -5349,8 +5477,12 @@ static void update_pmu_cap(struct pmu *pmu)
>>                 hybrid(pmu, arch_pebs_cap).counters = pebs_mask;
>>                 hybrid(pmu, arch_pebs_cap).pdists = pdists_mask;
>>
>> -               if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask))
>> +               if (WARN_ON((pebs_mask | pdists_mask) & ~cntrs_mask)) {
>>                         x86_pmu.arch_pebs = 0;
>> +               } else {
>> +                       __intel_update_pmu_caps(pmu);
>> +                       __intel_update_large_pebs_flags(pmu);
>> +               }
>>         } else {
>>                 WARN_ON(x86_pmu.arch_pebs == 1);
>>                 x86_pmu.arch_pebs = 0;
>> @@ -5514,6 +5646,8 @@ static void intel_pmu_cpu_starting(int cpu)
>>                 }
>>         }
>>
>> +       __intel_update_pmu_caps(cpuc->pmu);
>> +
>>         if (!cpuc->shared_regs)
>>                 return;
>>
>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>> index 1179980f795b..c66e9b562de3 100644
>> --- a/arch/x86/events/intel/ds.c
>> +++ b/arch/x86/events/intel/ds.c
>> @@ -1528,6 +1528,18 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
>>         }
>>  }
>>
>> +u64 intel_get_arch_pebs_data_config(struct perf_event *event)
>> +{
>> +       u64 pebs_data_cfg = 0;
>> +
>> +       if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
>> +               return 0;
>> +
>> +       pebs_data_cfg |= pebs_update_adaptive_cfg(event);
>> +
>> +       return pebs_data_cfg;
>> +}
>> +
>>  void intel_pmu_pebs_add(struct perf_event *event)
>>  {
>>         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>> @@ -2947,6 +2959,11 @@ static void intel_pmu_drain_arch_pebs(struct pt_regs *iregs,
>>
>>         index.wr = 0;
>>         index.full = 0;
>> +       index.en = 1;
>> +       if (cpuc->n_pebs == cpuc->n_large_pebs)
>> +               index.thresh = ARCH_PEBS_THRESH_MULTI;
>> +       else
>> +               index.thresh = ARCH_PEBS_THRESH_SINGLE;
>>         wrmsrq(MSR_IA32_PEBS_INDEX, index.whole);
>>
>>         mask = hybrid(cpuc->pmu, arch_pebs_cap).counters & cpuc->pebs_enabled;
>> diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
>> index 13f411bca6bc..3161ec0a3416 100644
>> --- a/arch/x86/events/perf_event.h
>> +++ b/arch/x86/events/perf_event.h
>> @@ -304,6 +304,8 @@ struct cpu_hw_events {
>>         /* Intel ACR configuration */
>>         u64                     acr_cfg_b[X86_PMC_IDX_MAX];
>>         u64                     acr_cfg_c[X86_PMC_IDX_MAX];
>> +       /* Cached CFG_C values */
>> +       u64                     cfg_c_val[X86_PMC_IDX_MAX];
>>
>>         /*
>>          * Intel LBR bits
>> @@ -1782,6 +1784,8 @@ void intel_pmu_pebs_data_source_cmt(void);
>>
>>  void intel_pmu_pebs_data_source_lnl(void);
>>
>> +u64 intel_get_arch_pebs_data_config(struct perf_event *event);
>> +
>>  int intel_pmu_setup_lbr_filter(struct perf_event *event);
>>
>>  void intel_pt_interrupt(void);
>> diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h
>> index 023c2883f9f3..695f87efbeb8 100644
>> --- a/arch/x86/include/asm/intel_ds.h
>> +++ b/arch/x86/include/asm/intel_ds.h
>> @@ -7,6 +7,13 @@
>>  #define PEBS_BUFFER_SHIFT      4
>>  #define PEBS_BUFFER_SIZE       (PAGE_SIZE << PEBS_BUFFER_SHIFT)
>>
>> +/*
>> + * The largest PEBS record could consume a page, ensure
>> + * a record at least can be written after triggering PMI.
>> + */
>> +#define ARCH_PEBS_THRESH_MULTI ((PEBS_BUFFER_SIZE - PAGE_SIZE) >> PEBS_BUFFER_SHIFT)
>> +#define ARCH_PEBS_THRESH_SINGLE        1
>> +
>>  /* The maximal number of PEBS events: */
>>  #define MAX_PEBS_EVENTS_FMT4   8
>>  #define MAX_PEBS_EVENTS                32
>> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
>> index fc7a4e7c718d..f1ef9ac38bfb 100644
>> --- a/arch/x86/include/asm/msr-index.h
>> +++ b/arch/x86/include/asm/msr-index.h
>> @@ -333,6 +333,14 @@
>>  #define ARCH_PEBS_OFFSET_MASK          0x7fffff
>>  #define ARCH_PEBS_INDEX_WR_SHIFT       4
>>
>> +#define ARCH_PEBS_RELOAD               0xffffffff
>> +#define ARCH_PEBS_LBR_SHIFT            40
>> +#define ARCH_PEBS_LBR                  (0x3ull << ARCH_PEBS_LBR_SHIFT)
>> +#define ARCH_PEBS_VECR_XMM             BIT_ULL(49)
>> +#define ARCH_PEBS_GPR                  BIT_ULL(61)
>> +#define ARCH_PEBS_AUX                  BIT_ULL(62)
>> +#define ARCH_PEBS_EN                   BIT_ULL(63)
>> +
>>  #define MSR_IA32_RTIT_CTL              0x00000570
>>  #define RTIT_CTL_TRACEEN               BIT(0)
>>  #define RTIT_CTL_CYCLEACC              BIT(1)
>> --
>> 2.34.1
>>

  reply	other threads:[~2026-03-06  2:17 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-29 10:21 [Patch v9 00/12] arch-PEBS enabling for Intel platforms Dapeng Mi
2025-10-29 10:21 ` [Patch v9 01/12] perf/x86: Remove redundant is_x86_event() prototype Dapeng Mi
2025-10-29 10:21 ` [Patch v9 02/12] perf/x86: Fix NULL event access and potential PEBS record loss Dapeng Mi
2025-11-06 14:19   ` Peter Zijlstra
2025-10-29 10:21 ` [Patch v9 03/12] perf/x86/intel: Replace x86_pmu.drain_pebs calling with static call Dapeng Mi
2025-10-29 10:21 ` [Patch v9 04/12] perf/x86/intel: Correct large PEBS flag check Dapeng Mi
2025-10-29 10:21 ` [Patch v9 05/12] perf/x86/intel: Initialize architectural PEBS Dapeng Mi
2026-03-05  0:50   ` Ian Rogers
2026-03-06  1:38     ` Mi, Dapeng
2025-10-29 10:21 ` [Patch v9 06/12] perf/x86/intel/ds: Factor out PEBS record processing code to functions Dapeng Mi
2025-10-29 10:21 ` [Patch v9 07/12] perf/x86/intel/ds: Factor out PEBS group " Dapeng Mi
2025-10-29 10:21 ` [Patch v9 08/12] perf/x86/intel: Process arch-PEBS records or record fragments Dapeng Mi
2026-03-03  0:20   ` Chun-Tse Shao
2026-03-06  1:20     ` Mi, Dapeng
2025-10-29 10:21 ` [Patch v9 09/12] perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR Dapeng Mi
2025-10-29 10:21 ` [Patch v9 10/12] perf/x86/intel: Update dyn_constranit base on PEBS event precise level Dapeng Mi
2025-11-06 14:52   ` Peter Zijlstra
2025-11-07  6:11     ` Mi, Dapeng
2025-11-07  8:28       ` Peter Zijlstra
2025-11-07  8:36         ` Mi, Dapeng
2025-11-07 13:05       ` Peter Zijlstra
2025-11-10  0:23         ` Mi, Dapeng
2025-11-10  9:03           ` Peter Zijlstra
2025-11-10  9:15             ` Mi, Dapeng
2025-11-11  5:41               ` Mi, Dapeng
2025-11-11 11:37                 ` Peter Zijlstra
2025-11-12  0:16                   ` Mi, Dapeng
2025-10-29 10:21 ` [Patch v9 11/12] perf/x86/intel: Setup PEBS data configuration and enable legacy groups Dapeng Mi
2026-03-05  1:20   ` Ian Rogers
2026-03-06  2:17     ` Mi, Dapeng [this message]
2025-10-29 10:21 ` [Patch v9 12/12] perf/x86/intel: Add counter group support for arch-PEBS Dapeng Mi
2026-03-09 22:59   ` Ian Rogers
2026-03-10  2:06     ` Mi, Dapeng
2026-03-10  4:36       ` Ian Rogers

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=59e94944-78be-4d76-9445-6095093e0742@linux.intel.com \
    --to=dapeng1.mi@linux.intel.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=dapeng1.mi@intel.com \
    --cc=eranian@google.com \
    --cc=irogers@google.com \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=thomas.falcon@intel.com \
    --cc=xudong.hao@intel.com \
    --cc=zide.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox