From: kan.liang@linux.intel.com
To: peterz@infradead.org, mingo@kernel.org, acme@kernel.org,
namhyung@kernel.org, irogers@google.com, adrian.hunter@intel.com,
alexander.shishkin@linux.intel.com, linux-kernel@vger.kernel.org
Cc: ak@linux.intel.com, eranian@google.com,
Kan Liang <kan.liang@linux.intel.com>
Subject: [RESEND PATCH 04/12] perf/x86/intel: Support new data source for Lunar Lake
Date: Tue, 18 Jun 2024 08:10:36 -0700 [thread overview]
Message-ID: <20240618151044.1318612-5-kan.liang@linux.intel.com> (raw)
In-Reply-To: <20240618151044.1318612-1-kan.liang@linux.intel.com>
From: Kan Liang <kan.liang@linux.intel.com>
A new PEBS data source format is introduced for the p-core of Lunar
Lake. The data source field is extended to 8 bits with new encodings.
A new layout is introduced into the union intel_x86_pebs_dse.
Introduce the lnl_latency_data() to parse the new format.
Enlarge the pebs_data_source[] accordingly to include new encodings.
Only the mem load and the mem store events can generate the data source.
Introduce INTEL_HYBRID_LDLAT_CONSTRAINT and
INTEL_HYBRID_STLAT_CONSTRAINT to mark them.
Add two new bits for the new cache-related data src, L2_MHB and MSC.
The L2_MHB is short for L2 Miss Handling Buffer, which is similar to
LFB (Line Fill Buffer), but to track the L2 Cache misses.
The MSC stands for the memory-side cache.
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
---
arch/x86/events/intel/core.c | 2 +
arch/x86/events/intel/ds.c | 88 ++++++++++++++++++++++++++++++++-
arch/x86/events/perf_event.h | 16 +++++-
include/uapi/linux/perf_event.h | 6 ++-
4 files changed, 107 insertions(+), 5 deletions(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 42e65fb6f2ff..60806f373226 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -6960,6 +6960,7 @@ __init int intel_pmu_init(void)
case INTEL_FAM6_ARROWLAKE:
intel_pmu_init_hybrid(hybrid_big_small);
+ x86_pmu.pebs_latency_data = lnl_latency_data;
x86_pmu.get_event_constraints = mtl_get_event_constraints;
x86_pmu.hw_config = adl_hw_config;
@@ -6977,6 +6978,7 @@ __init int intel_pmu_init(void)
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
intel_pmu_init_skt(&pmu->pmu);
+ intel_pmu_pebs_data_source_lnl();
pr_cont("Lunarlake Hybrid events, ");
name = "lunarlake_hybrid";
break;
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 42a38222c044..abf2b1991bc0 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -63,6 +63,15 @@ union intel_x86_pebs_dse {
unsigned int mtl_fwd_blk:1;
unsigned int ld_reserved4:24;
};
+ struct {
+ unsigned int lnc_dse:8;
+ unsigned int ld_reserved5:2;
+ unsigned int lnc_stlb_miss:1;
+ unsigned int lnc_locked:1;
+ unsigned int lnc_data_blk:1;
+ unsigned int lnc_addr_blk:1;
+ unsigned int ld_reserved6:18;
+ };
};
@@ -77,7 +86,7 @@ union intel_x86_pebs_dse {
#define SNOOP_NONE_MISS (P(SNOOP, NONE) | P(SNOOP, MISS))
/* Version for Sandy Bridge and later */
-static u64 pebs_data_source[] = {
+static u64 pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA),/* 0x00:ukn L3 */
OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 local */
OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x02: LFB hit */
@@ -173,6 +182,40 @@ void __init intel_pmu_pebs_data_source_cmt(void)
__intel_pmu_pebs_data_source_cmt(pebs_data_source);
}
+/* Version for Lunar Lake p-core and later */
+static u64 lnc_pebs_data_source[PERF_PEBS_DATA_SOURCE_MAX] = {
+ P(OP, LOAD) | P(LVL, MISS) | LEVEL(L3) | P(SNOOP, NA), /* 0x00: ukn L3 */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x01: L1 hit */
+ OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x02: L1 hit */
+ OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x03: LFB/L1 Miss Handling Buffer hit */
+ 0, /* 0x04: Reserved */
+ OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x05: L2 Hit */
+ OP_LH | LEVEL(L2_MHB) | P(SNOOP, NONE), /* 0x06: L2 Miss Handling Buffer Hit */
+ 0, /* 0x07: Reserved */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, NONE), /* 0x08: L3 Hit */
+ 0, /* 0x09: Reserved */
+ 0, /* 0x0a: Reserved */
+ 0, /* 0x0b: Reserved */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD), /* 0x0c: L3 Hit Snoop Fwd */
+ OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0d: L3 Hit Snoop HitM */
+ 0, /* 0x0e: Reserved */
+ P(OP, LOAD) | P(LVL, MISS) | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM), /* 0x0f: L3 Miss Snoop HitM */
+ OP_LH | LEVEL(MSC) | P(SNOOP, NONE), /* 0x10: Memory-side Cache Hit */
+ OP_LH | P(LVL, LOC_RAM) | LEVEL(RAM) | P(SNOOP, NONE), /* 0x11: Local Memory Hit */
+};
+
+void __init intel_pmu_pebs_data_source_lnl(void)
+{
+ u64 *data_source;
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX].pebs_data_source;
+ memcpy(data_source, lnc_pebs_data_source, sizeof(lnc_pebs_data_source));
+
+ data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source;
+ memcpy(data_source, pebs_data_source, sizeof(pebs_data_source));
+ __intel_pmu_pebs_data_source_cmt(data_source);
+}
+
static u64 precise_store_data(u64 status)
{
union intel_x86_pebs_dse dse;
@@ -264,7 +307,7 @@ static u64 __adl_latency_data_small(struct perf_event *event, u64 status,
WARN_ON_ONCE(hybrid_pmu(event->pmu)->pmu_type == hybrid_big);
- dse &= PERF_PEBS_DATA_SOURCE_MASK;
+ dse &= PERF_PEBS_DATA_SOURCE_GRT_MASK;
val = hybrid_var(event->pmu, pebs_data_source)[dse];
pebs_set_tlb_lock(&val, tlb, lock);
@@ -300,6 +343,45 @@ u64 mtl_latency_data_small(struct perf_event *event, u64 status)
dse.mtl_fwd_blk);
}
+u64 lnl_latency_data(struct perf_event *event, u64 status)
+{
+ struct x86_hybrid_pmu *pmu = hybrid_pmu(event->pmu);
+ union intel_x86_pebs_dse dse;
+ union perf_mem_data_src src;
+ u64 val;
+
+ if (pmu->pmu_type == hybrid_small)
+ return mtl_latency_data_small(event, status);
+
+ dse.val = status;
+
+ /* LNC core latency data */
+ val = hybrid_var(event->pmu, pebs_data_source)[status & PERF_PEBS_DATA_SOURCE_MASK];
+ if (!val)
+ val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA);
+
+ if (dse.lnc_stlb_miss)
+ val |= P(TLB, MISS) | P(TLB, L2);
+ else
+ val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2);
+
+ if (dse.lnc_locked)
+ val |= P(LOCK, LOCKED);
+
+ if (dse.lnc_data_blk)
+ val |= P(BLK, DATA);
+ if (dse.lnc_addr_blk)
+ val |= P(BLK, ADDR);
+ if (!dse.lnc_data_blk && !dse.lnc_addr_blk)
+ val |= P(BLK, NA);
+
+ src.val = val;
+ if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+ src.mem_op = P(OP, STORE);
+
+ return src.val;
+}
+
static u64 load_latency_data(struct perf_event *event, u64 status)
{
union intel_x86_pebs_dse dse;
@@ -1090,6 +1172,8 @@ struct event_constraint intel_lnc_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x100, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),
+ INTEL_HYBRID_LDLAT_CONSTRAINT(0x1cd, 0x3ff),
+ INTEL_HYBRID_STLAT_CONSTRAINT(0x2cd, 0x3),
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_LOADS */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf), /* MEM_INST_RETIRED.STLB_MISS_STORES */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf), /* MEM_INST_RETIRED.LOCK_LOADS */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index 92df40b8926c..66209bb2ba77 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -476,6 +476,14 @@ struct cpu_hw_events {
__EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID)
+#define INTEL_HYBRID_LDLAT_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_LD_HSW)
+
+#define INTEL_HYBRID_STLAT_CONSTRAINT(c, n) \
+ __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK|X86_ALL_EVENT_FLAGS, \
+ HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LAT_HYBRID|PERF_X86_EVENT_PEBS_ST_HSW)
+
/* Event constraint, but match on all event flags too. */
#define INTEL_FLAGS_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT|X86_ALL_EVENT_FLAGS)
@@ -655,8 +663,10 @@ enum {
x86_lbr_exclusive_max,
};
-#define PERF_PEBS_DATA_SOURCE_MAX 0x10
+#define PERF_PEBS_DATA_SOURCE_MAX 0x100
#define PERF_PEBS_DATA_SOURCE_MASK (PERF_PEBS_DATA_SOURCE_MAX - 1)
+#define PERF_PEBS_DATA_SOURCE_GRT_MAX 0x10
+#define PERF_PEBS_DATA_SOURCE_GRT_MASK (PERF_PEBS_DATA_SOURCE_GRT_MAX - 1)
enum hybrid_cpu_type {
HYBRID_INTEL_NONE,
@@ -1542,6 +1552,8 @@ u64 adl_latency_data_small(struct perf_event *event, u64 status);
u64 mtl_latency_data_small(struct perf_event *event, u64 status);
+u64 lnl_latency_data(struct perf_event *event, u64 status);
+
extern struct event_constraint intel_core2_pebs_event_constraints[];
extern struct event_constraint intel_atom_pebs_event_constraints[];
@@ -1663,6 +1675,8 @@ void intel_pmu_pebs_data_source_mtl(void);
void intel_pmu_pebs_data_source_cmt(void);
+void intel_pmu_pebs_data_source_lnl(void);
+
int intel_pmu_setup_lbr_filter(struct perf_event *event);
void intel_pt_interrupt(void);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 3a64499b0f5d..4842c36fdf80 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -1349,12 +1349,14 @@ union perf_mem_data_src {
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
-/* 5-0x7 available */
+#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */
+/* 0x7 available */
#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
+#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */
#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
--
2.35.1
next prev parent reply other threads:[~2024-06-18 15:12 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-06-18 15:10 [RESEND PATCH 00/12] Support Lunar Lake and Arrow Lake core PMU kan.liang
2024-06-18 15:10 ` [RESEND PATCH 01/12] perf/x86/intel: Support the PEBS event mask kan.liang
2024-06-20 7:02 ` Peter Zijlstra
2024-06-20 15:58 ` Liang, Kan
2024-06-21 14:19 ` Liang, Kan
2024-06-24 8:29 ` Peter Zijlstra
2024-06-24 8:21 ` Peter Zijlstra
2024-06-18 15:10 ` [RESEND PATCH 02/12] perf/x86: Support counter mask kan.liang
2024-06-20 7:06 ` Peter Zijlstra
2024-06-20 16:02 ` Liang, Kan
2024-06-18 15:10 ` [RESEND PATCH 03/12] perf/x86: Add Lunar Lake and Arrow Lake support kan.liang
2024-06-18 15:10 ` kan.liang [this message]
2024-06-20 7:34 ` [RESEND PATCH 04/12] perf/x86/intel: Support new data source for Lunar Lake Peter Zijlstra
2024-06-20 16:09 ` Liang, Kan
2024-06-18 15:10 ` [RESEND PATCH 05/12] perf/x86: Add config_mask to represent EVENTSEL bitmask kan.liang
2024-06-20 7:44 ` Peter Zijlstra
2024-06-20 16:16 ` Liang, Kan
2024-06-21 18:34 ` Liang, Kan
2024-06-24 8:28 ` Peter Zijlstra
2024-06-24 15:36 ` Liang, Kan
2024-06-24 8:26 ` Peter Zijlstra
2024-06-18 15:10 ` [RESEND PATCH 06/12] perf/x86/intel: Support PERFEVTSEL extension kan.liang
2024-06-18 15:10 ` [RESEND PATCH 07/12] perf/x86/intel: Support Perfmon MSRs aliasing kan.liang
2024-06-20 8:02 ` Peter Zijlstra
2024-06-20 16:17 ` Liang, Kan
2024-06-18 15:10 ` [RESEND PATCH 08/12] perf/x86: Extend event update interface kan.liang
2024-06-20 8:38 ` Peter Zijlstra
2024-06-20 16:18 ` Liang, Kan
2024-06-18 15:10 ` [RESEND PATCH 09/12] perf: Extend perf_output_read kan.liang
2024-06-20 9:00 ` Peter Zijlstra
2024-06-20 10:01 ` Peter Zijlstra
2024-06-18 15:10 ` [RESEND PATCH 10/12] perf/x86/intel: Move PEBS event update after the sample output kan.liang
2024-06-18 15:10 ` [RESEND PATCH 11/12] perf/x86/intel: Support PEBS counters snapshotting kan.liang
2024-06-18 15:10 ` [RESEND PATCH 12/12] perf/x86/intel: Support RDPMC metrics clear mode kan.liang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240618151044.1318612-5-kan.liang@linux.intel.com \
--to=kan.liang@linux.intel.com \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=ak@linux.intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=eranian@google.com \
--cc=irogers@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.