* [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs
@ 2022-01-19 18:28 David Dunn
2022-01-19 18:28 ` [PATCH 2/3] Verify that the PMU event filter works as expected David Dunn
` (3 more replies)
0 siblings, 4 replies; 6+ messages in thread
From: David Dunn @ 2022-01-19 18:28 UTC (permalink / raw)
To: kvm, pbonzini, like.xu.linux, jmattson, cloudliang; +Cc: daviddunn
When PMU virtualization is enabled via the module parameter, usermode
can disable PMU virtualization on individual VMs using this new
capability.
This provides a uniform way to disable PMU virtualization on x86. Since
AMD doesn't have a CPUID bit for PMU support, disabling PMU
virtualization requires some other state to indicate whether the PMU
related MSRs are ignored.
Since KVM_GET_SUPPORTED_CPUID reports the maximal CPUID information
based on module parameters, usermode will need to adjust CPUID when
disabling PMU virtualization on individual VMs. On Intel CPUs, the
change to PMU enablement will not alter existing until SET_CPUID2 is
invoked.
Signed-off-by: David Dunn <daviddunn@google.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/svm/pmu.c | 2 +-
arch/x86/kvm/vmx/pmu_intel.c | 2 +-
arch/x86/kvm/x86.c | 11 +++++++++++
include/uapi/linux/kvm.h | 1 +
tools/include/uapi/linux/kvm.h | 1 +
6 files changed, 16 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 682ad02a4e58..5cdcd4a7671b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1232,6 +1232,7 @@ struct kvm_arch {
hpa_t hv_root_tdp;
spinlock_t hv_root_tdp_lock;
#endif
+ bool enable_pmu;
};
struct kvm_vm_stat {
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 5aa45f13b16d..605bcfb55625 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -101,7 +101,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
{
struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
- if (!enable_pmu)
+ if (!enable_pmu || !vcpu->kvm->arch.enable_pmu)
return NULL;
switch (msr) {
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 466d18fc0c5d..4c3885765027 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -487,7 +487,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->reserved_bits = 0xffffffff00200000ull;
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
- if (!entry || !enable_pmu)
+ if (!entry || !vcpu->kvm->arch.enable_pmu || !enable_pmu)
return;
eax.full = entry->eax;
edx.full = entry->edx;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 55518b7d3b96..9b640c5bb4f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4326,6 +4326,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
if (r < sizeof(struct kvm_xsave))
r = sizeof(struct kvm_xsave);
break;
+ case KVM_CAP_ENABLE_PMU:
+ r = enable_pmu;
+ break;
}
default:
break;
@@ -5937,6 +5940,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
kvm->arch.exit_on_emulation_error = cap->args[0];
r = 0;
break;
+ case KVM_CAP_ENABLE_PMU:
+ r = -EINVAL;
+ if (!enable_pmu || cap->args[0] & ~1)
+ break;
+ kvm->arch.enable_pmu = cap->args[0];
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
@@ -11562,6 +11572,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
kvm->arch.guest_can_read_msr_platform_info = true;
+ kvm->arch.enable_pmu = true;
#if IS_ENABLED(CONFIG_HYPERV)
spin_lock_init(&kvm->arch.hv_root_tdp_lock);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 9563d294f181..37cbcdffe773 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
#define KVM_CAP_VM_GPA_BITS 207
#define KVM_CAP_XSAVE2 208
+#define KVM_CAP_ENABLE_PMU 209
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index f066637ee206..e71712c71ab1 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1132,6 +1132,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_ARM_MTE 205
#define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
#define KVM_CAP_XSAVE2 207
+#define KVM_CAP_ENABLE_PMU 209
#ifdef KVM_CAP_IRQ_ROUTING
--
2.34.1.703.g22d0c6ccf7-goog
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 2/3] Verify that the PMU event filter works as expected.
2022-01-19 18:28 [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs David Dunn
@ 2022-01-19 18:28 ` David Dunn
2022-01-19 18:28 ` [PATCH 3/3] Verify KVM_CAP_ENABLE_PMU in kvm pmu_event_filter_test selftest David Dunn
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: David Dunn @ 2022-01-19 18:28 UTC (permalink / raw)
To: kvm, pbonzini, like.xu.linux, jmattson, cloudliang; +Cc: daviddunn
Note that the virtual PMU doesn't work as expected on AMD Zen CPUs (an
intercepted rdmsr is counted as a retired branch instruction), but the
PMU event filter does work.
This is a local application of change authored by Jim Mattson and sent
to kvm mailiong list on Jan 14, 2022.
Signed-off-by: Jim Mattson <jmattson@google.com>
Signed-off-by: David Dunn <daviddunn@google.com>
---
.../kvm/x86_64/pmu_event_filter_test.c | 194 +++++++++++++++---
1 file changed, 163 insertions(+), 31 deletions(-)
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index 8ac99d4cbc73..aa104946e6e0 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -16,10 +16,38 @@
#include "processor.h"
/*
- * In lieue of copying perf_event.h into tools...
+ * In lieu of copying perf_event.h into tools...
*/
-#define ARCH_PERFMON_EVENTSEL_ENABLE BIT(22)
-#define ARCH_PERFMON_EVENTSEL_OS BIT(17)
+#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
+#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)
+
+union cpuid10_eax {
+ struct {
+ unsigned int version_id:8;
+ unsigned int num_counters:8;
+ unsigned int bit_width:8;
+ unsigned int mask_length:8;
+ } split;
+ unsigned int full;
+};
+
+union cpuid10_ebx {
+ struct {
+ unsigned int no_unhalted_core_cycles:1;
+ unsigned int no_instructions_retired:1;
+ unsigned int no_unhalted_reference_cycles:1;
+ unsigned int no_llc_reference:1;
+ unsigned int no_llc_misses:1;
+ unsigned int no_branch_instruction_retired:1;
+ unsigned int no_branch_misses_retired:1;
+ } split;
+ unsigned int full;
+};
+
+/* End of stuff taken from perf_event.h. */
+
+/* Oddly, this isn't in perf_event.h. */
+#define ARCH_PERFMON_BRANCHES_RETIRED 5
#define VCPU_ID 0
#define NUM_BRANCHES 42
@@ -45,14 +73,15 @@
* Preliminary Processor Programming Reference (PPR) for AMD Family
* 17h Model 31h, Revision B0 Processors, and Preliminary Processor
* Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
- * B1 Processors Volume 1 of 2
+ * B1 Processors Volume 1 of 2.
*/
#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)
/*
* This event list comprises Intel's eight architectural events plus
- * AMD's "branch instructions retired" for Zen[123].
+ * AMD's "retired branch instructions" for Zen[123] (and possibly
+ * other AMD CPUs).
*/
static const uint64_t event_list[] = {
EVENT(0x3c, 0),
@@ -66,11 +95,45 @@ static const uint64_t event_list[] = {
AMD_ZEN_BR_RETIRED,
};
+/*
+ * If we encounter a #GP during the guest PMU sanity check, then the guest
+ * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
+ */
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(0);
+}
+
+/*
+ * Check that we can write a new value to the given MSR and read it back.
+ * The caller should provide a non-empty set of bits that are safe to flip.
+ *
+ * Return on success. GUEST_SYNC(0) on error.
+ */
+static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+{
+ uint64_t v = rdmsr(msr) ^ bits_to_flip;
+
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(0);
+
+ v ^= bits_to_flip;
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(0);
+}
+
static void intel_guest_code(void)
{
- uint64_t br0, br1;
+ check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+ check_msr(MSR_P6_EVNTSEL0, 0xffff);
+ check_msr(MSR_IA32_PMC0, 0xffff);
+ GUEST_SYNC(1);
for (;;) {
+ uint64_t br0, br1;
+
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
@@ -83,15 +146,19 @@ static void intel_guest_code(void)
}
/*
- * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit
- * 23], this code uses the always-available, legacy K7 PMU MSRs, which
- * alias to the first four of the six extended core PMU MSRs.
+ * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
+ * this code uses the always-available, legacy K7 PMU MSRs, which alias to
+ * the first four of the six extended core PMU MSRs.
*/
static void amd_guest_code(void)
{
- uint64_t br0, br1;
+ check_msr(MSR_K7_EVNTSEL0, 0xffff);
+ check_msr(MSR_K7_PERFCTR0, 0xffff);
+ GUEST_SYNC(1);
for (;;) {
+ uint64_t br0, br1;
+
wrmsr(MSR_K7_EVNTSEL0, 0);
wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
@@ -102,7 +169,11 @@ static void amd_guest_code(void)
}
}
-static uint64_t test_branches_retired(struct kvm_vm *vm)
+/*
+ * Run the VM to the next GUEST_SYNC(value), and return the value passed
+ * to the sync. Any other exit from the guest is fatal.
+ */
+static uint64_t run_vm_to_sync(struct kvm_vm *vm)
{
struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
@@ -118,6 +189,25 @@ static uint64_t test_branches_retired(struct kvm_vm *vm)
return uc.args[1];
}
+/*
+ * In a nested environment or if the vPMU is disabled, the guest PMU
+ * might not work as architected (accessing the PMU MSRs may raise
+ * #GP, or writes could simply be discarded). In those situations,
+ * there is no point in running these tests. The guest code will perform
+ * a sanity check and then GUEST_SYNC(success). In the case of failure,
+ * the behavior of the guest on resumption is undefined.
+ */
+static bool sanity_check_pmu(struct kvm_vm *vm)
+{
+ bool success;
+
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+ success = run_vm_to_sync(vm);
+ vm_install_exception_handler(vm, GP_VECTOR, NULL);
+
+ return success;
+}
+
static struct kvm_pmu_event_filter *make_pmu_event_filter(uint32_t nevents)
{
struct kvm_pmu_event_filter *f;
@@ -143,6 +233,10 @@ static struct kvm_pmu_event_filter *event_filter(uint32_t action)
return f;
}
+/*
+ * Remove the first occurrence of 'event' (if any) from the filter's
+ * event list.
+ */
static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
uint64_t event)
{
@@ -160,9 +254,9 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
return f;
}
-static void test_no_filter(struct kvm_vm *vm)
+static void test_without_filter(struct kvm_vm *vm)
{
- uint64_t count = test_branches_retired(vm);
+ uint64_t count = run_vm_to_sync(vm);
if (count != NUM_BRANCHES)
pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
@@ -174,7 +268,7 @@ static uint64_t test_with_filter(struct kvm_vm *vm,
struct kvm_pmu_event_filter *f)
{
vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f);
- return test_branches_retired(vm);
+ return run_vm_to_sync(vm);
}
static void test_member_deny_list(struct kvm_vm *vm)
@@ -231,40 +325,70 @@ static void test_not_member_allow_list(struct kvm_vm *vm)
TEST_ASSERT(!count, "Disallowed PMU Event is counting");
}
+/*
+ * Check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, an EBX bit vector of length greater
+ * than 5, and EBX[5] clear.
+ */
+static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry)
+{
+ union cpuid10_eax eax = { .full = entry->eax };
+ union cpuid10_ebx ebx = { .full = entry->ebx };
+
+ return eax.split.version_id && eax.split.num_counters > 0 &&
+ eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
+ !ebx.split.no_branch_instruction_retired;
+}
+
/*
* Note that CPUID leaf 0xa is Intel-specific. This leaf should be
* clear on AMD hardware.
*/
-static bool vcpu_supports_intel_br_retired(void)
+static bool use_intel_pmu(void)
{
struct kvm_cpuid_entry2 *entry;
struct kvm_cpuid2 *cpuid;
cpuid = kvm_get_supported_cpuid();
entry = kvm_get_supported_cpuid_index(0xa, 0);
- return entry &&
- (entry->eax & 0xff) &&
- (entry->eax >> 24) > 5 &&
- !(entry->ebx & BIT(5));
+ return is_intel_cpu() && entry && check_intel_pmu_leaf(entry);
+}
+
+static bool is_zen1(uint32_t eax)
+{
+ return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
+}
+
+static bool is_zen2(uint32_t eax)
+{
+ return x86_family(eax) == 0x17 &&
+ x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
+}
+
+static bool is_zen3(uint32_t eax)
+{
+ return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
}
/*
* Determining AMD support for a PMU event requires consulting the AMD
- * PPR for the CPU or reference material derived therefrom.
+ * PPR for the CPU or reference material derived therefrom. The AMD
+ * test code herein has been verified to work on Zen1, Zen2, and Zen3.
+ *
+ * Feel free to add more AMD CPUs that are documented to support event
+ * select 0xc2 umask 0 as "retired branch instructions."
*/
-static bool vcpu_supports_amd_zen_br_retired(void)
+static bool use_amd_pmu(void)
{
struct kvm_cpuid_entry2 *entry;
struct kvm_cpuid2 *cpuid;
cpuid = kvm_get_supported_cpuid();
entry = kvm_get_supported_cpuid_index(1, 0);
- return entry &&
- ((x86_family(entry->eax) == 0x17 &&
- (x86_model(entry->eax) == 1 ||
- x86_model(entry->eax) == 0x31)) ||
- (x86_family(entry->eax) == 0x19 &&
- x86_model(entry->eax) == 1));
+ return is_amd_cpu() && entry &&
+ (is_zen1(entry->eax) ||
+ is_zen2(entry->eax) ||
+ is_zen3(entry->eax));
}
int main(int argc, char *argv[])
@@ -282,19 +406,27 @@ int main(int argc, char *argv[])
exit(KSFT_SKIP);
}
- if (vcpu_supports_intel_br_retired())
+ if (use_intel_pmu())
guest_code = intel_guest_code;
- else if (vcpu_supports_amd_zen_br_retired())
+ else if (use_amd_pmu())
guest_code = amd_guest_code;
if (!guest_code) {
- print_skip("Branch instructions retired not supported");
+ print_skip("Don't know how to test this guest PMU");
exit(KSFT_SKIP);
}
vm = vm_create_default(VCPU_ID, 0, guest_code);
- test_no_filter(vm);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ if (!sanity_check_pmu(vm)) {
+ print_skip("Guest PMU is not functional");
+ exit(KSFT_SKIP);
+ }
+
+ test_without_filter(vm);
test_member_deny_list(vm);
test_member_allow_list(vm);
test_not_member_deny_list(vm);
--
2.34.1.703.g22d0c6ccf7-goog
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH 3/3] Verify KVM_CAP_ENABLE_PMU in kvm pmu_event_filter_test selftest.
2022-01-19 18:28 [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs David Dunn
2022-01-19 18:28 ` [PATCH 2/3] Verify that the PMU event filter works as expected David Dunn
@ 2022-01-19 18:28 ` David Dunn
2022-01-20 1:15 ` [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs Sean Christopherson
2022-01-20 3:02 ` Like Xu
3 siblings, 0 replies; 6+ messages in thread
From: David Dunn @ 2022-01-19 18:28 UTC (permalink / raw)
To: kvm, pbonzini, like.xu.linux, jmattson, cloudliang; +Cc: daviddunn
After disabling PMU using KVM_CAP_ENABLE_PMU, the PMU should no longer
be visible to the guest. On Intel, this causes a #GP and on AMD the
counters are no longer functional.
Signed-off-by: David Dunn <daviddunn@google.com>
---
.../kvm/x86_64/pmu_event_filter_test.c | 29 +++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index aa104946e6e0..0bd502d3055c 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -325,6 +325,34 @@ static void test_not_member_allow_list(struct kvm_vm *vm)
TEST_ASSERT(!count, "Disallowed PMU Event is counting");
}
+/*
+ * Verify that disabling PMU using KVM_CAP_ENABLE_PMU does not allow PMU.
+ *
+ * After every change to CAP_ENABLE_PMU, SET_CPUID2 is required to refresh
+ * KVM PMU state on existing VCPU.
+ */
+static void test_cap_enable_pmu(struct kvm_vm *vm)
+{
+ int r;
+ struct kvm_cpuid2 *cpuid2;
+ struct kvm_enable_cap cap = { .cap = KVM_CAP_ENABLE_PMU };
+ bool sane;
+
+ r = kvm_check_cap(KVM_CAP_ENABLE_PMU);
+ if (!r)
+ return;
+
+ cpuid2 = vcpu_get_cpuid(vm, VCPU_ID);
+
+ cap.args[0] = 0;
+ r = vm_enable_cap(vm, &cap);
+ vcpu_set_cpuid(vm, VCPU_ID, cpuid2);
+
+ sane = sanity_check_pmu(vm);
+
+ TEST_ASSERT(!sane, "Guest should not see PMU when disabled.");
+}
+
/*
* Check for a non-zero PMU version, at least one general-purpose
* counter per logical processor, an EBX bit vector of length greater
@@ -431,6 +459,7 @@ int main(int argc, char *argv[])
test_member_allow_list(vm);
test_not_member_deny_list(vm);
test_not_member_allow_list(vm);
+ test_cap_enable_pmu(vm);
kvm_vm_free(vm);
--
2.34.1.703.g22d0c6ccf7-goog
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs
2022-01-19 18:28 [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs David Dunn
2022-01-19 18:28 ` [PATCH 2/3] Verify that the PMU event filter works as expected David Dunn
2022-01-19 18:28 ` [PATCH 3/3] Verify KVM_CAP_ENABLE_PMU in kvm pmu_event_filter_test selftest David Dunn
@ 2022-01-20 1:15 ` Sean Christopherson
2022-01-20 3:00 ` David Dunn
2022-01-20 3:02 ` Like Xu
3 siblings, 1 reply; 6+ messages in thread
From: Sean Christopherson @ 2022-01-20 1:15 UTC (permalink / raw)
To: David Dunn; +Cc: kvm, pbonzini, like.xu.linux, jmattson, cloudliang
On Wed, Jan 19, 2022, David Dunn wrote:
> When PMU virtualization is enabled via the module parameter, usermode
> can disable PMU virtualization on individual VMs using this new
> capability.
>
> This provides a uniform way to disable PMU virtualization on x86. Since
> AMD doesn't have a CPUID bit for PMU support, disabling PMU
> virtualization requires some other state to indicate whether the PMU
> related MSRs are ignored.
>
> Since KVM_GET_SUPPORTED_CPUID reports the maximal CPUID information
> based on module parameters, usermode will need to adjust CPUID when
> disabling PMU virtualization on individual VMs. On Intel CPUs, the
> change to PMU enablement will not alter existing until SET_CPUID2 is
> invoked.
>
> Signed-off-by: David Dunn <daviddunn@google.com>
> ---
I'm not necessarily opposed to this capability, but can't userspace get the same
result by using MSR filtering to inject #GP on the PMU MSRs?
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 55518b7d3b96..9b640c5bb4f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4326,6 +4326,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> if (r < sizeof(struct kvm_xsave))
> r = sizeof(struct kvm_xsave);
> break;
> + case KVM_CAP_ENABLE_PMU:
> + r = enable_pmu;
> + break;
> }
> default:
> break;
> @@ -5937,6 +5940,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
> kvm->arch.exit_on_emulation_error = cap->args[0];
> r = 0;
> break;
> + case KVM_CAP_ENABLE_PMU:
> + r = -EINVAL;
> + if (!enable_pmu || cap->args[0] & ~1)
Probably worth adding a #define in uapi/.../kvm.h for bit 0.
> + break;
> + kvm->arch.enable_pmu = cap->args[0];
> + r = 0;
> + break;
> default:
> r = -EINVAL;
> break;
> @@ -11562,6 +11572,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
>
> kvm->arch.guest_can_read_msr_platform_info = true;
> + kvm->arch.enable_pmu = true;
Rather than default to "true", just capture the global "enable_pmu" and then all
the sites that check "enable_pmu" in VM context can check _only_ kvm->arch.enable_pmu.
enable_pmu is readonly, so there's no danger of it being toggled after the VM is
created.
> #if IS_ENABLED(CONFIG_HYPERV)
> spin_lock_init(&kvm->arch.hv_root_tdp_lock);
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 9563d294f181..37cbcdffe773 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
> #define KVM_CAP_VM_GPA_BITS 207
> #define KVM_CAP_XSAVE2 208
> +#define KVM_CAP_ENABLE_PMU 209
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
> index f066637ee206..e71712c71ab1 100644
> --- a/tools/include/uapi/linux/kvm.h
> +++ b/tools/include/uapi/linux/kvm.h
> @@ -1132,6 +1132,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_ARM_MTE 205
> #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
> #define KVM_CAP_XSAVE2 207
> +#define KVM_CAP_ENABLE_PMU 209
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> --
> 2.34.1.703.g22d0c6ccf7-goog
>
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs
2022-01-20 1:15 ` [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs Sean Christopherson
@ 2022-01-20 3:00 ` David Dunn
0 siblings, 0 replies; 6+ messages in thread
From: David Dunn @ 2022-01-20 3:00 UTC (permalink / raw)
To: Sean Christopherson; +Cc: kvm, pbonzini, like.xu.linux, Jim Mattson, cloudliang
Thanks Sean.
On Wed, Jan 19, 2022 at 5:15 PM Sean Christopherson <seanjc@google.com> wrote:
> I'm not necessarily opposed to this capability, but can't userspace get the same
> result by using MSR filtering to inject #GP on the PMU MSRs?
Yes. It is possible for each userspace to inject #GP on Intel and
ignore on AMD. But
I think it is less error prone to handle it once in KVM in the same
way we handle the
module parameter. No extra complexity in KVM but it reduces the
complexity in clients.
> Probably worth adding a #define in uapi/.../kvm.h for bit 0.
> Rather than default to "true", just capture the global "enable_pmu" and then all
> the sites that check "enable_pmu" in VM context can check _only_ kvm->arch.enable_pmu.
> enable_pmu is readonly, so there's no danger of it being toggled after the VM is
> created.
Thanks for the feedback. I'll incorporate both of these in v2.
Dave Dunn
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs
2022-01-19 18:28 [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs David Dunn
` (2 preceding siblings ...)
2022-01-20 1:15 ` [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs Sean Christopherson
@ 2022-01-20 3:02 ` Like Xu
3 siblings, 0 replies; 6+ messages in thread
From: Like Xu @ 2022-01-20 3:02 UTC (permalink / raw)
To: David Dunn
Cc: kvm list,
Paolo Bonzini - Distinguished Engineer (kernel-recipes.org),
cloudliang(梁金荣), Jim Mattson
Hi David,
Thanks for coming to address this.
Please modify the patch(es) subject to follow the convention.
On 20/1/2022 2:28 am, David Dunn wrote:
> When PMU virtualization is enabled via the module parameter, usermode
> can disable PMU virtualization on individual VMs using this new
> capability.
Will the user space fail or be notified when the enable_pmu say no ?
>
> This provides a uniform way to disable PMU virtualization on x86. Since
> AMD doesn't have a CPUID bit for PMU support, disabling PMU
Not entirely absent, such as PERFCTR_CORE.
> virtualization requires some other state to indicate whether the PMU
> related MSRs are ignored.
Not just ignored, but made to disappear altogether.
>
> Since KVM_GET_SUPPORTED_CPUID reports the maximal CPUID information
> based on module parameters, usermode will need to adjust CPUID when
> disabling PMU virtualization on individual VMs. On Intel CPUs, the
> change to PMU enablement will not alter existing until SET_CPUID2 is
> invoked.
Please clarify. Do we have a requirement for the order in which the
SET_CPUID2 and ioctl_enable_cap interfaces are called?
>
> Signed-off-by: David Dunn <daviddunn@google.com>
> ---
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/kvm/svm/pmu.c | 2 +-
> arch/x86/kvm/vmx/pmu_intel.c | 2 +-
> arch/x86/kvm/x86.c | 11 +++++++++++
> include/uapi/linux/kvm.h | 1 +
> tools/include/uapi/linux/kvm.h | 1 +
> 6 files changed, 16 insertions(+), 2 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 682ad02a4e58..5cdcd4a7671b 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1232,6 +1232,7 @@ struct kvm_arch {
> hpa_t hv_root_tdp;
> spinlock_t hv_root_tdp_lock;
> #endif
> + bool enable_pmu;
The name makes it difficult to distinguish the scope of access to the variable.
Try storing it via "pmu->version == 0".
> };
>
> struct kvm_vm_stat {
> diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
> index 5aa45f13b16d..605bcfb55625 100644
> --- a/arch/x86/kvm/svm/pmu.c
> +++ b/arch/x86/kvm/svm/pmu.c
> @@ -101,7 +101,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
> {
> struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
>
> - if (!enable_pmu)
> + if (!enable_pmu || !vcpu->kvm->arch.enable_pmu)
> return NULL;
>
> switch (msr) {
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index 466d18fc0c5d..4c3885765027 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -487,7 +487,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
> pmu->reserved_bits = 0xffffffff00200000ull;
>
> entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
> - if (!entry || !enable_pmu)
> + if (!entry || !vcpu->kvm->arch.enable_pmu || !enable_pmu)
> return;
> eax.full = entry->eax;
> edx.full = entry->edx;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 55518b7d3b96..9b640c5bb4f6 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4326,6 +4326,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
> if (r < sizeof(struct kvm_xsave))
> r = sizeof(struct kvm_xsave);
> break;
> + case KVM_CAP_ENABLE_PMU:
> + r = enable_pmu;
> + break;
> }
> default:
> break;
> @@ -5937,6 +5940,13 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
> kvm->arch.exit_on_emulation_error = cap->args[0];
> r = 0;
> break;
> + case KVM_CAP_ENABLE_PMU:
> + r = -EINVAL;
> + if (!enable_pmu || cap->args[0] & ~1)
> + break;
> + kvm->arch.enable_pmu = cap->args[0];
> + r = 0;
> + break;
> default:
> r = -EINVAL;
> break;
> @@ -11562,6 +11572,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
> raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
>
> kvm->arch.guest_can_read_msr_platform_info = true;
> + kvm->arch.enable_pmu = true;
>
> #if IS_ENABLED(CONFIG_HYPERV)
> spin_lock_init(&kvm->arch.hv_root_tdp_lock);
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 9563d294f181..37cbcdffe773 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -1133,6 +1133,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
> #define KVM_CAP_VM_GPA_BITS 207
> #define KVM_CAP_XSAVE2 208
> +#define KVM_CAP_ENABLE_PMU 209
Rename it to KVM_CAP_PMU_CAPABILITY and use the bit 0 for *DISABLE_PMU*.
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
> diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
> index f066637ee206..e71712c71ab1 100644
> --- a/tools/include/uapi/linux/kvm.h
> +++ b/tools/include/uapi/linux/kvm.h
> @@ -1132,6 +1132,7 @@ struct kvm_ppc_resize_hpt {
> #define KVM_CAP_ARM_MTE 205
> #define KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM 206
> #define KVM_CAP_XSAVE2 207
> +#define KVM_CAP_ENABLE_PMU 209
>
> #ifdef KVM_CAP_IRQ_ROUTING
>
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2022-01-20 3:02 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-01-19 18:28 [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs David Dunn
2022-01-19 18:28 ` [PATCH 2/3] Verify that the PMU event filter works as expected David Dunn
2022-01-19 18:28 ` [PATCH 3/3] Verify KVM_CAP_ENABLE_PMU in kvm pmu_event_filter_test selftest David Dunn
2022-01-20 1:15 ` [PATCH 1/3] Provide VM capability to disable PMU virtualization for individual VMs Sean Christopherson
2022-01-20 3:00 ` David Dunn
2022-01-20 3:02 ` Like Xu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).