From: Dapeng Mi <dapeng1.mi@linux.intel.com>
To: Peter Zijlstra <peterz@infradead.org>,
Ingo Molnar <mingo@redhat.com>,
Arnaldo Carvalho de Melo <acme@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
Ian Rogers <irogers@google.com>,
Adrian Hunter <adrian.hunter@intel.com>,
Alexander Shishkin <alexander.shishkin@linux.intel.com>,
Kan Liang <kan.liang@linux.intel.com>,
Andi Kleen <ak@linux.intel.com>,
Eranian Stephane <eranian@google.com>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
Dapeng Mi <dapeng1.mi@intel.com>,
Dapeng Mi <dapeng1.mi@linux.intel.com>
Subject: [Patch v2 16/24] perf/x86/intel: Add counter group support for arch-PEBS
Date: Tue, 18 Feb 2025 15:28:10 +0000 [thread overview]
Message-ID: <20250218152818.158614-17-dapeng1.mi@linux.intel.com> (raw)
In-Reply-To: <20250218152818.158614-1-dapeng1.mi@linux.intel.com>
Base on previous adaptive PEBS counter snapshot support, add counter
group support for architectural PEBS. Since arch-PEBS shares same
counter group layout with adaptive PEBS, directly reuse
__setup_pebs_counter_group() helper to process arch-PEBS counter group.
Signed-off-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
---
arch/x86/events/intel/core.c | 38 ++++++++++++++++++++++++++++---
arch/x86/events/intel/ds.c | 31 +++++++++++++++++++++----
arch/x86/events/perf_event.h | 2 ++
arch/x86/include/asm/msr-index.h | 6 +++++
arch/x86/include/asm/perf_event.h | 13 ++++++++---
5 files changed, 80 insertions(+), 10 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index b80a66751136..f21d9f283445 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2965,6 +2965,17 @@ static void intel_pmu_enable_event_ext(struct perf_event *event)
if (pebs_data_cfg & PEBS_DATACFG_LBRS)
ext |= ARCH_PEBS_LBR & cap.caps;
+
+ if (pebs_data_cfg &
+ (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT))
+ ext |= ARCH_PEBS_CNTR_GP & cap.caps;
+
+ if (pebs_data_cfg &
+ (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT))
+ ext |= ARCH_PEBS_CNTR_FIXED & cap.caps;
+
+ if (pebs_data_cfg & PEBS_DATACFG_METRICS)
+ ext |= ARCH_PEBS_CNTR_METRICS & cap.caps;
}
if (cpuc->n_pebs == cpuc->n_large_pebs)
@@ -2990,6 +3001,9 @@ static void intel_pmu_enable_event_ext(struct perf_event *event)
}
}
+ if (is_pebs_counter_event_group(event))
+ ext |= ARCH_PEBS_CNTR_ALLOW;
+
if (cpuc->cfg_c_val[hwc->idx] != ext)
__intel_pmu_update_event_ext(hwc->idx, ext);
}
@@ -4120,6 +4134,20 @@ static inline bool intel_pmu_has_cap(struct perf_event *event, int idx)
return test_bit(idx, (unsigned long *)&intel_cap->capabilities);
}
+static inline bool intel_pmu_has_pebs_counter_group(struct pmu *pmu)
+{
+ u64 caps;
+
+ if (x86_pmu.intel_cap.pebs_format >= 6 && x86_pmu.intel_cap.pebs_baseline)
+ return true;
+
+ caps = hybrid(pmu, arch_pebs_cap).caps;
+ if (x86_pmu.arch_pebs && (caps & ARCH_PEBS_CNTR_MASK))
+ return true;
+
+ return false;
+}
+
static int intel_pmu_hw_config(struct perf_event *event)
{
int ret = x86_pmu_hw_config(event);
@@ -4242,8 +4270,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
}
if ((event->attr.sample_type & PERF_SAMPLE_READ) &&
- (x86_pmu.intel_cap.pebs_format >= 6) &&
- x86_pmu.intel_cap.pebs_baseline &&
+ intel_pmu_has_pebs_counter_group(event->pmu) &&
is_sampling_event(event) &&
event->attr.precise_ip)
event->group_leader->hw.flags |= PERF_X86_EVENT_PEBS_CNTR;
@@ -5097,6 +5124,8 @@ static inline void __intel_update_large_pebs_flags(struct pmu *pmu)
x86_pmu.large_pebs_flags |= PERF_SAMPLE_TIME;
if (caps & ARCH_PEBS_LBR)
x86_pmu.large_pebs_flags |= PERF_SAMPLE_BRANCH_STACK;
+ if (caps & ARCH_PEBS_CNTR_MASK)
+ x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
if (!(caps & ARCH_PEBS_AUX))
x86_pmu.large_pebs_flags &= ~PERF_SAMPLE_DATA_SRC;
@@ -6759,8 +6788,11 @@ __init int intel_pmu_init(void)
* Many features on and after V6 require dynamic constraint,
* e.g., Arch PEBS, ACR.
*/
- if (version >= 6)
+ if (version >= 6) {
x86_pmu.flags |= PMU_FL_DYN_CONSTRAINT;
+ x86_pmu.late_setup = intel_pmu_late_setup;
+ }
+
/*
* Install the hw-cache-events table:
*/
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index cad653706431..4b01beee15f4 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1383,7 +1383,7 @@ static void __intel_pmu_pebs_update_cfg(struct perf_event *event,
}
-static void intel_pmu_late_setup(void)
+void intel_pmu_late_setup(void)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
struct perf_event *event;
@@ -1494,13 +1494,20 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc,
u64 intel_get_arch_pebs_data_config(struct perf_event *event)
{
+ struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
u64 pebs_data_cfg = 0;
+ u64 cntr_mask;
if (WARN_ON(event->hw.idx < 0 || event->hw.idx >= X86_PMC_IDX_MAX))
return 0;
pebs_data_cfg |= pebs_update_adaptive_cfg(event);
+ cntr_mask = (PEBS_DATACFG_CNTR_MASK << PEBS_DATACFG_CNTR_SHIFT) |
+ (PEBS_DATACFG_FIX_MASK << PEBS_DATACFG_FIX_SHIFT) |
+ PEBS_DATACFG_CNTR | PEBS_DATACFG_METRICS;
+ pebs_data_cfg |= cpuc->pebs_data_cfg & cntr_mask;
+
return pebs_data_cfg;
}
@@ -2411,6 +2418,24 @@ static void setup_arch_pebs_sample_data(struct perf_event *event,
}
}
+ if (header->cntr) {
+ struct arch_pebs_cntr_header *cntr = next_record;
+ unsigned int nr;
+
+ next_record += sizeof(struct arch_pebs_cntr_header);
+
+ if (is_pebs_counter_event_group(event)) {
+ __setup_pebs_counter_group(cpuc, event,
+ (struct pebs_cntr_header *)cntr, next_record);
+ data->sample_flags |= PERF_SAMPLE_READ;
+ }
+
+ nr = hweight32(cntr->cntr) + hweight32(cntr->fixed);
+ if (cntr->metrics == INTEL_CNTR_METRICS)
+ nr += 2;
+ next_record += nr * sizeof(u64);
+ }
+
/* Parse followed fragments if there are. */
if (arch_pebs_record_continued(header)) {
at = at + header->size;
@@ -3040,10 +3065,8 @@ static void __init intel_ds_pebs_init(void)
break;
case 6:
- if (x86_pmu.intel_cap.pebs_baseline) {
+ if (x86_pmu.intel_cap.pebs_baseline)
x86_pmu.large_pebs_flags |= PERF_SAMPLE_READ;
- x86_pmu.late_setup = intel_pmu_late_setup;
- }
fallthrough;
case 5:
x86_pmu.pebs_ept = 1;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 69c4341f5753..cba7b928fdb2 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -1697,6 +1697,8 @@ void intel_pmu_store_pebs_lbrs(struct lbr_entry *lbr);
void intel_pebs_init(void);
+void intel_pmu_late_setup(void);
+
void intel_pmu_lbr_save_brstack(struct perf_sample_data *data,
struct cpu_hw_events *cpuc,
struct perf_event *event);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1e67cb467946..0ca84deb2396 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -319,12 +319,18 @@
#define ARCH_PEBS_INDEX_WR_SHIFT 4
#define ARCH_PEBS_RELOAD 0xffffffff
+#define ARCH_PEBS_CNTR_ALLOW BIT_ULL(35)
+#define ARCH_PEBS_CNTR_GP BIT_ULL(36)
+#define ARCH_PEBS_CNTR_FIXED BIT_ULL(37)
+#define ARCH_PEBS_CNTR_METRICS BIT_ULL(38)
#define ARCH_PEBS_LBR_SHIFT 40
#define ARCH_PEBS_LBR (0x3ull << ARCH_PEBS_LBR_SHIFT)
#define ARCH_PEBS_VECR_XMM BIT_ULL(49)
#define ARCH_PEBS_GPR BIT_ULL(61)
#define ARCH_PEBS_AUX BIT_ULL(62)
#define ARCH_PEBS_EN BIT_ULL(63)
+#define ARCH_PEBS_CNTR_MASK (ARCH_PEBS_CNTR_GP | ARCH_PEBS_CNTR_FIXED | \
+ ARCH_PEBS_CNTR_METRICS)
#define MSR_IA32_RTIT_CTL 0x00000570
#define RTIT_CTL_TRACEEN BIT(0)
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index d5285bb4b333..461f0e357c9e 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -137,16 +137,16 @@
#define ARCH_PERFMON_EVENTS_COUNT 7
#define PEBS_DATACFG_MEMINFO BIT_ULL(0)
-#define PEBS_DATACFG_GP BIT_ULL(1)
+#define PEBS_DATACFG_GP BIT_ULL(1)
#define PEBS_DATACFG_XMMS BIT_ULL(2)
#define PEBS_DATACFG_LBRS BIT_ULL(3)
-#define PEBS_DATACFG_LBR_SHIFT 24
#define PEBS_DATACFG_CNTR BIT_ULL(4)
+#define PEBS_DATACFG_METRICS BIT_ULL(5)
+#define PEBS_DATACFG_LBR_SHIFT 24
#define PEBS_DATACFG_CNTR_SHIFT 32
#define PEBS_DATACFG_CNTR_MASK GENMASK_ULL(15, 0)
#define PEBS_DATACFG_FIX_SHIFT 48
#define PEBS_DATACFG_FIX_MASK GENMASK_ULL(7, 0)
-#define PEBS_DATACFG_METRICS BIT_ULL(5)
/* Steal the highest bit of pebs_data_cfg for SW usage */
#define PEBS_UPDATE_DS_SW BIT_ULL(63)
@@ -602,6 +602,13 @@ struct arch_pebs_lbr_header {
u64 ler_info;
};
+struct arch_pebs_cntr_header {
+ u32 cntr;
+ u32 fixed;
+ u32 metrics;
+ u32 reserved;
+};
+
/*
* AMD Extended Performance Monitoring and Debug cpuid feature detection
*/
--
2.40.1
next prev parent reply other threads:[~2025-02-18 8:14 UTC|newest]
Thread overview: 58+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-18 15:27 [Patch v2 00/24] Arch-PEBS and PMU supports for Clearwater Forest and Panther Lake Dapeng Mi
2025-02-18 15:27 ` [Patch v2 01/24] perf/x86: Add dynamic constraint Dapeng Mi
2025-02-18 15:27 ` [Patch v2 02/24] perf/x86/intel: Add Panther Lake support Dapeng Mi
2025-02-18 15:27 ` [Patch v2 03/24] perf/x86/intel: Add PMU support for Clearwater Forest Dapeng Mi
2025-02-18 15:27 ` [Patch v2 04/24] perf/x86/intel: Parse CPUID archPerfmonExt leaves for non-hybrid CPUs Dapeng Mi
2025-02-18 15:27 ` [Patch v2 05/24] perf/x86/intel: Decouple BTS initialization from PEBS initialization Dapeng Mi
2025-02-18 15:28 ` [Patch v2 06/24] perf/x86/intel: Rename x86_pmu.pebs to x86_pmu.ds_pebs Dapeng Mi
2025-02-18 15:28 ` [Patch v2 07/24] perf/x86/intel: Introduce pairs of PEBS static calls Dapeng Mi
2025-02-18 15:28 ` [Patch v2 08/24] perf/x86/intel: Initialize architectural PEBS Dapeng Mi
2025-02-18 15:28 ` [Patch v2 09/24] perf/x86/intel/ds: Factor out common PEBS processing code to functions Dapeng Mi
2025-02-18 15:28 ` [Patch v2 10/24] perf/x86/intel: Process arch-PEBS records or record fragments Dapeng Mi
2025-02-25 10:39 ` Peter Zijlstra
2025-02-25 11:00 ` Peter Zijlstra
2025-02-26 5:20 ` Mi, Dapeng
2025-02-26 9:35 ` Peter Zijlstra
2025-02-26 15:45 ` Liang, Kan
2025-02-27 2:04 ` Mi, Dapeng
2025-02-25 20:42 ` Andi Kleen
2025-02-26 2:54 ` Mi, Dapeng
2025-02-18 15:28 ` [Patch v2 11/24] perf/x86/intel: Factor out common functions to process PEBS groups Dapeng Mi
2025-02-25 11:02 ` Peter Zijlstra
2025-02-26 5:24 ` Mi, Dapeng
2025-02-18 15:28 ` [Patch v2 12/24] perf/x86/intel: Allocate arch-PEBS buffer and initialize PEBS_BASE MSR Dapeng Mi
2025-02-25 11:18 ` Peter Zijlstra
2025-02-26 5:48 ` Mi, Dapeng
2025-02-26 9:46 ` Peter Zijlstra
2025-02-27 2:05 ` Mi, Dapeng
2025-02-25 11:25 ` Peter Zijlstra
2025-02-26 6:19 ` Mi, Dapeng
2025-02-26 9:48 ` Peter Zijlstra
2025-02-27 2:09 ` Mi, Dapeng
2025-02-18 15:28 ` [Patch v2 13/24] perf/x86/intel: Update dyn_constranit base on PEBS event precise level Dapeng Mi
2025-02-27 14:06 ` Liang, Kan
2025-03-05 1:41 ` Mi, Dapeng
2025-02-18 15:28 ` [Patch v2 14/24] perf/x86/intel: Setup PEBS data configuration and enable legacy groups Dapeng Mi
2025-02-18 15:28 ` [Patch v2 15/24] perf/x86/intel: Add SSP register support for arch-PEBS Dapeng Mi
2025-02-25 11:52 ` Peter Zijlstra
2025-02-26 6:56 ` Mi, Dapeng
2025-02-25 11:54 ` Peter Zijlstra
2025-02-25 20:44 ` Andi Kleen
2025-02-27 6:29 ` Mi, Dapeng
2025-02-18 15:28 ` Dapeng Mi [this message]
2025-02-18 15:28 ` [Patch v2 17/24] perf/core: Support to capture higher width vector registers Dapeng Mi
2025-02-25 20:32 ` Peter Zijlstra
2025-02-26 7:55 ` Mi, Dapeng
2025-02-18 15:28 ` [Patch v2 18/24] perf/x86/intel: Support arch-PEBS vector registers group capturing Dapeng Mi
2025-02-25 15:32 ` Peter Zijlstra
2025-02-26 8:08 ` Mi, Dapeng
2025-02-27 6:40 ` Mi, Dapeng
2025-03-04 3:08 ` Mi, Dapeng
2025-03-04 16:26 ` Liang, Kan
2025-03-05 1:34 ` Mi, Dapeng
2025-02-18 15:28 ` [Patch v2 19/24] perf tools: Support to show SSP register Dapeng Mi
2025-02-18 15:28 ` [Patch v2 20/24] perf tools: Enhance arch__intr/user_reg_mask() helpers Dapeng Mi
2025-02-18 15:28 ` [Patch v2 21/24] perf tools: Enhance sample_regs_user/intr to capture more registers Dapeng Mi
2025-02-18 15:28 ` [Patch v2 22/24] perf tools: Support to capture more vector registers (x86/Intel) Dapeng Mi
2025-02-18 15:28 ` [Patch v2 23/24] perf tools/tests: Add vector registers PEBS sampling test Dapeng Mi
2025-02-18 15:28 ` [Patch v2 24/24] perf tools: Fix incorrect --user-regs comments Dapeng Mi
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250218152818.158614-17-dapeng1.mi@linux.intel.com \
--to=dapeng1.mi@linux.intel.com \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=dapeng1.mi@intel.com \
--cc=eranian@google.com \
--cc=irogers@google.com \
--cc=kan.liang@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox