* [PATCH v5 0/2] perf/x86/amd: Fix for LBR Freeze
@ 2024-03-25 7:31 Sandipan Das
2024-03-25 7:31 ` [PATCH v5 1/2] x86/cpufeatures: Add new word for scattered features Sandipan Das
2024-03-25 7:31 ` [PATCH v5 2/2] perf/x86/amd/lbr: Use freeze based on availability Sandipan Das
0 siblings, 2 replies; 3+ messages in thread
From: Sandipan Das @ 2024-03-25 7:31 UTC (permalink / raw)
To: linux-perf-users, linux-kernel
Cc: x86, peterz, mingo, acme, namhyung, mark.rutland,
alexander.shishkin, jolsa, adrian.hunter, tglx, bp, seanjc,
pbonzini, eranian, irogers, ravi.bangoria, ananth.narayan,
sandipan.das
Contains a fix for usage of LBR Freeze.
Previous versions can be found at:
v4: https://lore.kernel.org/all/cover.1710836172.git.sandipan.das@amd.com/
v3: https://lore.kernel.org/all/cover.1706526029.git.sandipan.das@amd.com/
v2: https://lore.kernel.org/all/cover.1704103399.git.sandipan.das@amd.com/
v1: https://lore.kernel.org/all/cover.1702833179.git.sandipan.das@amd.com/
Changes in v5:
- Restore previously moved scattered bits and instead create a new
Linux-defined feature word for new scattered bits as suggested by
Boris and Ingo.
Changes in v4:
- Move scattered feature bits from CPUID 0x80000022[EAX] to a dedicated
word as suggested by Ingo.
- Remove patches that have been merged (patch 2 and 3 from v3).
Changes in v3:
- As suggested by Boris, update the commit message of the first patch
with the reason behind making the LBR and PMC Freeze feature bit
visible in /proc/cpuinfo.
Changes in v2:
- Make the LBR and PMC Freeze feature bit visible in /proc/cpuinfo. As
suggested by Stephane, this will be useful to determine if it is
feasible to perform kernel FDO on a system.
Sandipan Das (2):
x86/cpufeatures: Add new word for scattered features
perf/x86/amd/lbr: Use freeze based on availability
arch/x86/events/amd/core.c | 4 ++--
arch/x86/events/amd/lbr.c | 16 ++++++++++------
arch/x86/include/asm/cpufeature.h | 6 ++++--
arch/x86/include/asm/cpufeatures.h | 10 +++++++++-
arch/x86/include/asm/disabled-features.h | 3 ++-
arch/x86/include/asm/required-features.h | 3 ++-
arch/x86/kernel/cpu/scattered.c | 1 +
7 files changed, 30 insertions(+), 13 deletions(-)
--
2.34.1
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH v5 1/2] x86/cpufeatures: Add new word for scattered features
2024-03-25 7:31 [PATCH v5 0/2] perf/x86/amd: Fix for LBR Freeze Sandipan Das
@ 2024-03-25 7:31 ` Sandipan Das
2024-03-25 7:31 ` [PATCH v5 2/2] perf/x86/amd/lbr: Use freeze based on availability Sandipan Das
1 sibling, 0 replies; 3+ messages in thread
From: Sandipan Das @ 2024-03-25 7:31 UTC (permalink / raw)
To: linux-perf-users, linux-kernel
Cc: x86, peterz, mingo, acme, namhyung, mark.rutland,
alexander.shishkin, jolsa, adrian.hunter, tglx, bp, seanjc,
pbonzini, eranian, irogers, ravi.bangoria, ananth.narayan,
sandipan.das
Add a new word for scattered features because all free bits among the
existing Linux-defined auxiliary flags have been exhausted.
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
---
arch/x86/include/asm/cpufeature.h | 6 ++++--
arch/x86/include/asm/cpufeatures.h | 2 +-
arch/x86/include/asm/disabled-features.h | 3 ++-
arch/x86/include/asm/required-features.h | 3 ++-
4 files changed, 9 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a1273698fc43..42157ddcc09d 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -91,8 +91,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -116,8 +117,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f0337f7bcf16..4d850a780f7e 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 21 /* N 32-bit words worth of info */
+#define NCAPINTS 22 /* N 32-bit words worth of info */
#define NBUGINTS 2 /* N 32-bit bug flags */
/*
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index da4054fbf533..c492bdc97b05 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -155,6 +155,7 @@
#define DISABLED_MASK18 (DISABLE_IBT)
#define DISABLED_MASK19 (DISABLE_SEV_SNP)
#define DISABLED_MASK20 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define DISABLED_MASK21 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 7ba1726b71c7..e9187ddd3d1f 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -99,6 +99,7 @@
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
#define REQUIRED_MASK20 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define REQUIRED_MASK21 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [PATCH v5 2/2] perf/x86/amd/lbr: Use freeze based on availability
2024-03-25 7:31 [PATCH v5 0/2] perf/x86/amd: Fix for LBR Freeze Sandipan Das
2024-03-25 7:31 ` [PATCH v5 1/2] x86/cpufeatures: Add new word for scattered features Sandipan Das
@ 2024-03-25 7:31 ` Sandipan Das
1 sibling, 0 replies; 3+ messages in thread
From: Sandipan Das @ 2024-03-25 7:31 UTC (permalink / raw)
To: linux-perf-users, linux-kernel
Cc: x86, peterz, mingo, acme, namhyung, mark.rutland,
alexander.shishkin, jolsa, adrian.hunter, tglx, bp, seanjc,
pbonzini, eranian, irogers, ravi.bangoria, ananth.narayan,
sandipan.das
Currently, it is assumed that LBR Freeze is supported on all processors
when X86_FEATURE_AMD_LBR_V2 is available i.e. CPUID leaf 0x80000022[EAX]
bit 1 is set. This is incorrect as the availability of the feature is
additionally dependent on CPUID leaf 0x80000022[EAX] bit 2 being set.
This may not be the case for all Zen 4 processors. Define a new feature
bit for LBR and PMC freeze and set the freeze enable bit (FLBRI) in
DebugCtl (MSR 0x1d9) conditionally.
It should still be possible to use LBR without freeze for profile-guided
optimization of user programs by using an user-only branch filter during
profiling. When the user-only filter is enabled, branches are no longer
recorded after the transition to CPL 0 upon PMI arrival. When branch
entries are read in the PMI handler, the branch stack does not change.
E.g.
$ perf record -j any,u -e ex_ret_brn_tkn ./workload
Since the feature bit is visible under flags in /proc/cpuinfo, it can be
used to determine the feasibility of use-cases which require LBR Freeze
to be supported by the hardware such as profile-guided optimization of
kernels.
Fixes: ca5b7c0d9621 ("perf/x86/amd/lbr: Add LbrExtV2 branch record support")
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
---
arch/x86/events/amd/core.c | 4 ++--
arch/x86/events/amd/lbr.c | 16 ++++++++++------
arch/x86/include/asm/cpufeatures.h | 8 ++++++++
arch/x86/kernel/cpu/scattered.c | 1 +
4 files changed, 21 insertions(+), 8 deletions(-)
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index aec16e581f5b..5692e827afef 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -904,8 +904,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
if (!status)
goto done;
- /* Read branch records before unfreezing */
- if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+ /* Read branch records */
+ if (x86_pmu.lbr_nr) {
amd_pmu_lbr_read();
status &= ~GLOBAL_STATUS_LBRS_FROZEN;
}
diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c
index 4a1e600314d5..5149830c7c4f 100644
--- a/arch/x86/events/amd/lbr.c
+++ b/arch/x86/events/amd/lbr.c
@@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void)
wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
}
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
- rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
}
@@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void)
return;
rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
- rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
- wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+ if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+ }
}
__init int amd_pmu_lbr_init(void)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 4d850a780f7e..a38f8f9ba657 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -459,6 +459,14 @@
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0dad49a09b7a..a515328d9d7d 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
{ X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
{ X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
+ { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
--
2.34.1
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-03-25 7:32 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-25 7:31 [PATCH v5 0/2] perf/x86/amd: Fix for LBR Freeze Sandipan Das
2024-03-25 7:31 ` [PATCH v5 1/2] x86/cpufeatures: Add new word for scattered features Sandipan Das
2024-03-25 7:31 ` [PATCH v5 2/2] perf/x86/amd/lbr: Use freeze based on availability Sandipan Das
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).