From: Sean Christopherson <seanjc@google.com>
To: Sean Christopherson <seanjc@google.com>,
Paolo Bonzini <pbonzini@redhat.com>
Cc: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
Chao Gao <chao.gao@intel.com>, Borislav Petkov <bp@alien8.de>,
Xin Li <xin@zytor.com>, Dapeng Mi <dapeng1.mi@linux.intel.com>,
Francesco Lavra <francescolavra.fl@gmail.com>,
Manali Shukla <Manali.Shukla@amd.com>
Subject: [PATCH v2 18/32] KVM: VMX: Manually recalc all MSR intercepts on userspace MSR filter change
Date: Tue, 10 Jun 2025 15:57:23 -0700 [thread overview]
Message-ID: <20250610225737.156318-19-seanjc@google.com> (raw)
In-Reply-To: <20250610225737.156318-1-seanjc@google.com>
On a userspace MSR filter change, recalculate all MSR intercepts using the
filter-agnostic logic instead of maintaining a "shadow copy" of KVM's
desired intercepts. The shadow bitmaps add yet another point of failure,
are confusing (e.g. what does "handled specially" mean!?!?), an eyesore,
and a maintenance burden.
Given that KVM *must* be able to recalculate the correct intercepts at any
given time, and that MSR filter updates are not hot paths, there is zero
benefit to maintaining the shadow bitmaps.
Opportunistically switch from boot_cpu_has() to cpu_feature_enabled() as
appropriate.
Link: https://lore.kernel.org/all/aCdPbZiYmtni4Bjs@google.com
Link: https://lore.kernel.org/all/20241126180253.GAZ0YNTdXH1UGeqsu6@fat_crate.local
Cc: Borislav Petkov <bp@alien8.de>
Reviewed-by: Chao Gao <chao.gao@intel.com>
Reviewed-by: Xin Li (Intel) <xin@zytor.com>
Reviewed-by: Dapeng Mi <dapeng1.mi@linux.intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/vmx/vmx.c | 183 +++++++++++------------------------------
arch/x86/kvm/vmx/vmx.h | 7 --
2 files changed, 46 insertions(+), 144 deletions(-)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8f7fe04a1998..ce7a1c07e402 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -166,31 +166,6 @@ module_param(allow_smaller_maxphyaddr, bool, S_IRUGO);
RTIT_STATUS_ERROR | RTIT_STATUS_STOPPED | \
RTIT_STATUS_BYTECNT))
-/*
- * List of MSRs that can be directly passed to the guest.
- * In addition to these x2apic, PT and LBR MSRs are handled specially.
- */
-static u32 vmx_possible_passthrough_msrs[MAX_POSSIBLE_PASSTHROUGH_MSRS] = {
- MSR_IA32_SPEC_CTRL,
- MSR_IA32_PRED_CMD,
- MSR_IA32_FLUSH_CMD,
- MSR_IA32_TSC,
-#ifdef CONFIG_X86_64
- MSR_FS_BASE,
- MSR_GS_BASE,
- MSR_KERNEL_GS_BASE,
- MSR_IA32_XFD,
- MSR_IA32_XFD_ERR,
-#endif
- MSR_IA32_SYSENTER_CS,
- MSR_IA32_SYSENTER_ESP,
- MSR_IA32_SYSENTER_EIP,
- MSR_CORE_C1_RES,
- MSR_CORE_C3_RESIDENCY,
- MSR_CORE_C6_RESIDENCY,
- MSR_CORE_C7_RESIDENCY,
-};
-
/*
* These 2 parameters are used to config the controls for Pause-Loop Exiting:
* ple_gap: upper bound on the amount of time between two successive
@@ -672,40 +647,6 @@ static inline bool cpu_need_virtualize_apic_accesses(struct kvm_vcpu *vcpu)
return flexpriority_enabled && lapic_in_kernel(vcpu);
}
-static int vmx_get_passthrough_msr_slot(u32 msr)
-{
- int i;
-
- switch (msr) {
- case 0x800 ... 0x8ff:
- /* x2APIC MSRs. These are handled in vmx_update_msr_bitmap_x2apic() */
- return -ENOENT;
- case MSR_IA32_RTIT_STATUS:
- case MSR_IA32_RTIT_OUTPUT_BASE:
- case MSR_IA32_RTIT_OUTPUT_MASK:
- case MSR_IA32_RTIT_CR3_MATCH:
- case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B:
- /* PT MSRs. These are handled in pt_update_intercept_for_msr() */
- case MSR_LBR_SELECT:
- case MSR_LBR_TOS:
- case MSR_LBR_INFO_0 ... MSR_LBR_INFO_0 + 31:
- case MSR_LBR_NHM_FROM ... MSR_LBR_NHM_FROM + 31:
- case MSR_LBR_NHM_TO ... MSR_LBR_NHM_TO + 31:
- case MSR_LBR_CORE_FROM ... MSR_LBR_CORE_FROM + 8:
- case MSR_LBR_CORE_TO ... MSR_LBR_CORE_TO + 8:
- /* LBR MSRs. These are handled in vmx_update_intercept_for_lbr_msrs() */
- return -ENOENT;
- }
-
- for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
- if (vmx_possible_passthrough_msrs[i] == msr)
- return i;
- }
-
- WARN(1, "Invalid MSR %x, please adapt vmx_possible_passthrough_msrs[]", msr);
- return -ENOENT;
-}
-
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
{
int i;
@@ -4015,25 +3956,12 @@ void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
- int idx;
if (!cpu_has_vmx_msr_bitmap())
return;
vmx_msr_bitmap_l01_changed(vmx);
- /*
- * Mark the desired intercept state in shadow bitmap, this is needed
- * for resync when the MSR filters change.
- */
- idx = vmx_get_passthrough_msr_slot(msr);
- if (idx >= 0) {
- if (type & MSR_TYPE_R)
- __clear_bit(idx, vmx->shadow_msr_intercept.read);
- if (type & MSR_TYPE_W)
- __clear_bit(idx, vmx->shadow_msr_intercept.write);
- }
-
if ((type & MSR_TYPE_R) &&
!kvm_msr_allowed(vcpu, msr, KVM_MSR_FILTER_READ)) {
vmx_set_msr_bitmap_read(msr_bitmap, msr);
@@ -4057,25 +3985,12 @@ void vmx_enable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long *msr_bitmap = vmx->vmcs01.msr_bitmap;
- int idx;
if (!cpu_has_vmx_msr_bitmap())
return;
vmx_msr_bitmap_l01_changed(vmx);
- /*
- * Mark the desired intercept state in shadow bitmap, this is needed
- * for resync when the MSR filter changes.
- */
- idx = vmx_get_passthrough_msr_slot(msr);
- if (idx >= 0) {
- if (type & MSR_TYPE_R)
- __set_bit(idx, vmx->shadow_msr_intercept.read);
- if (type & MSR_TYPE_W)
- __set_bit(idx, vmx->shadow_msr_intercept.write);
- }
-
if (type & MSR_TYPE_R)
vmx_set_msr_bitmap_read(msr_bitmap, msr);
@@ -4159,35 +4074,58 @@ void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu)
}
}
-void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
+static void vmx_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
{
- struct vcpu_vmx *vmx = to_vmx(vcpu);
- u32 i;
-
if (!cpu_has_vmx_msr_bitmap())
return;
- /*
- * Redo intercept permissions for MSRs that KVM is passing through to
- * the guest. Disabling interception will check the new MSR filter and
- * ensure that KVM enables interception if usersepace wants to filter
- * the MSR. MSRs that KVM is already intercepting don't need to be
- * refreshed since KVM is going to intercept them regardless of what
- * userspace wants.
- */
- for (i = 0; i < ARRAY_SIZE(vmx_possible_passthrough_msrs); i++) {
- u32 msr = vmx_possible_passthrough_msrs[i];
-
- if (!test_bit(i, vmx->shadow_msr_intercept.read))
- vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_R);
-
- if (!test_bit(i, vmx->shadow_msr_intercept.write))
- vmx_disable_intercept_for_msr(vcpu, msr, MSR_TYPE_W);
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
+#ifdef CONFIG_X86_64
+ vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+#endif
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
+ if (kvm_cstate_in_guest(vcpu->kvm)) {
+ vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
+ vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
}
/* PT MSRs can be passed through iff PT is exposed to the guest. */
if (vmx_pt_mode_is_host_guest())
pt_update_intercept_for_msr(vcpu);
+
+ if (vcpu->arch.xfd_no_write_intercept)
+ vmx_disable_intercept_for_msr(vcpu, MSR_IA32_XFD, MSR_TYPE_RW);
+
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_SPEC_CTRL, MSR_TYPE_RW,
+ !to_vmx(vcpu)->spec_ctrl);
+
+ if (kvm_cpu_cap_has(X86_FEATURE_XFD))
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD));
+
+ if (cpu_feature_enabled(X86_FEATURE_IBPB))
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
+ !guest_has_pred_cmd_msr(vcpu));
+
+ if (cpu_feature_enabled(X86_FEATURE_FLUSH_L1D))
+ vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
+ !guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
+
+ /*
+ * x2APIC and LBR MSR intercepts are modified on-demand and cannot be
+ * filtered by userspace.
+ */
+}
+
+void vmx_msr_filter_changed(struct kvm_vcpu *vcpu)
+{
+ vmx_recalc_msr_intercepts(vcpu);
}
static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
@@ -7537,26 +7475,6 @@ int vmx_vcpu_create(struct kvm_vcpu *vcpu)
evmcs->hv_enlightenments_control.msr_bitmap = 1;
}
- /* The MSR bitmap starts with all ones */
- bitmap_fill(vmx->shadow_msr_intercept.read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
- bitmap_fill(vmx->shadow_msr_intercept.write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
-
- vmx_disable_intercept_for_msr(vcpu, MSR_IA32_TSC, MSR_TYPE_R);
-#ifdef CONFIG_X86_64
- vmx_disable_intercept_for_msr(vcpu, MSR_FS_BASE, MSR_TYPE_RW);
- vmx_disable_intercept_for_msr(vcpu, MSR_GS_BASE, MSR_TYPE_RW);
- vmx_disable_intercept_for_msr(vcpu, MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
-#endif
- vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
- vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
- vmx_disable_intercept_for_msr(vcpu, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
- if (kvm_cstate_in_guest(vcpu->kvm)) {
- vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C1_RES, MSR_TYPE_R);
- vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
- vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
- vmx_disable_intercept_for_msr(vcpu, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
- }
-
vmx->loaded_vmcs = &vmx->vmcs01;
if (cpu_need_virtualize_apic_accesses(vcpu)) {
@@ -7842,18 +7760,6 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
}
}
- if (kvm_cpu_cap_has(X86_FEATURE_XFD))
- vmx_set_intercept_for_msr(vcpu, MSR_IA32_XFD_ERR, MSR_TYPE_R,
- !guest_cpu_cap_has(vcpu, X86_FEATURE_XFD));
-
- if (boot_cpu_has(X86_FEATURE_IBPB))
- vmx_set_intercept_for_msr(vcpu, MSR_IA32_PRED_CMD, MSR_TYPE_W,
- !guest_has_pred_cmd_msr(vcpu));
-
- if (boot_cpu_has(X86_FEATURE_FLUSH_L1D))
- vmx_set_intercept_for_msr(vcpu, MSR_IA32_FLUSH_CMD, MSR_TYPE_W,
- !guest_cpu_cap_has(vcpu, X86_FEATURE_FLUSH_L1D));
-
set_cr4_guest_host_mask(vmx);
vmx_write_encls_bitmap(vcpu, NULL);
@@ -7869,6 +7775,9 @@ void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
vmx->msr_ia32_feature_control_valid_bits &=
~FEAT_CTL_SGX_LC_ENABLED;
+ /* Recalc MSR interception to account for feature changes. */
+ vmx_recalc_msr_intercepts(vcpu);
+
/* Refresh #PF interception to account for MAXPHYADDR changes. */
vmx_update_exception_bitmap(vcpu);
}
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 0afe97e3478f..a26fe3d9e1d2 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -294,13 +294,6 @@ struct vcpu_vmx {
struct pt_desc pt_desc;
struct lbr_desc lbr_desc;
- /* Save desired MSR intercept (read: pass-through) state */
-#define MAX_POSSIBLE_PASSTHROUGH_MSRS 16
- struct {
- DECLARE_BITMAP(read, MAX_POSSIBLE_PASSTHROUGH_MSRS);
- DECLARE_BITMAP(write, MAX_POSSIBLE_PASSTHROUGH_MSRS);
- } shadow_msr_intercept;
-
/* ve_info must be page aligned. */
struct vmx_ve_information *ve_info;
};
--
2.50.0.rc0.642.g800a2b2222-goog
next prev parent reply other threads:[~2025-06-10 22:58 UTC|newest]
Thread overview: 46+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-06-10 22:57 [PATCH v2 00/32] KVM: x86: Clean up MSR interception code Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 01/32] KVM: SVM: Disable interception of SPEC_CTRL iff the MSR exists for the guest Sean Christopherson
2025-06-11 4:38 ` Binbin Wu
2025-06-11 7:14 ` Binbin Wu
2025-06-10 22:57 ` [PATCH v2 02/32] KVM: SVM: Allocate IOPM pages after initial setup in svm_hardware_setup() Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 03/32] KVM: SVM: Don't BUG if setting up the MSR intercept bitmaps fails Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 04/32] KVM: SVM: Tag MSR bitmap initialization helpers with __init Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 05/32] KVM: SVM: Use ARRAY_SIZE() to iterate over direct_access_msrs Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 06/32] KVM: SVM: Kill the VM instead of the host if MSR interception is buggy Sean Christopherson
2025-06-11 2:16 ` Mi, Dapeng
2025-06-10 22:57 ` [PATCH v2 07/32] KVM: x86: Use non-atomic bit ops to manipulate "shadow" MSR intercepts Sean Christopherson
2025-06-11 6:38 ` Binbin Wu
2025-06-10 22:57 ` [PATCH v2 08/32] KVM: SVM: Massage name and param of helper that merges vmcb01 and vmcb12 MSRPMs Sean Christopherson
2025-06-11 2:22 ` Mi, Dapeng
2025-06-10 22:57 ` [PATCH v2 09/32] KVM: SVM: Clean up macros related to architectural MSRPM definitions Sean Christopherson
2025-06-11 6:09 ` Binbin Wu
2025-06-10 22:57 ` [PATCH v2 10/32] KVM: nSVM: Use dedicated array of MSRPM offsets to merge L0 and L1 bitmaps Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 11/32] KVM: nSVM: Omit SEV-ES specific passthrough MSRs from L0+L1 bitmap merge Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 12/32] KVM: nSVM: Don't initialize vmcb02 MSRPM with vmcb01's "always passthrough" Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 13/32] KVM: SVM: Add helpers for accessing MSR bitmap that don't rely on offsets Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 14/32] KVM: SVM: Implement and adopt VMX style MSR intercepts APIs Sean Christopherson
2025-06-11 7:31 ` Binbin Wu
2025-06-10 22:57 ` [PATCH v2 15/32] KVM: SVM: Pass through GHCB MSR if and only if VM is an SEV-ES guest Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 16/32] KVM: SVM: Drop "always" flag from list of possible passthrough MSRs Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 17/32] KVM: x86: Move definition of X2APIC_MSR() to lapic.h Sean Christopherson
2025-06-11 2:29 ` Mi, Dapeng
2025-06-10 22:57 ` Sean Christopherson [this message]
2025-06-11 6:52 ` [PATCH v2 18/32] KVM: VMX: Manually recalc all MSR intercepts on userspace MSR filter change Binbin Wu
2025-06-10 22:57 ` [PATCH v2 19/32] KVM: SVM: " Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 20/32] KVM: x86: Rename msr_filter_changed() => recalc_msr_intercepts() Sean Christopherson
2025-06-11 7:09 ` Binbin Wu
2025-06-10 22:57 ` [PATCH v2 21/32] KVM: SVM: Rename init_vmcb_after_set_cpuid() to make it intercepts specific Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 22/32] KVM: SVM: Fold svm_vcpu_init_msrpm() into its sole caller Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 23/32] KVM: SVM: Merge "after set CPUID" intercept recalc helpers Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 24/32] KVM: SVM: Drop explicit check on MSRPM offset when emulating SEV-ES accesses Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 25/32] KVM: SVM: Move svm_msrpm_offset() to nested.c Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 26/32] KVM: SVM: Store MSRPM pointer as "void *" instead of "u32 *" Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 27/32] KVM: nSVM: Access MSRPM in 4-byte chunks only for merging L0 and L1 bitmaps Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 28/32] KVM: SVM: Return -EINVAL instead of MSR_INVALID to signal out-of-range MSR Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 29/32] KVM: nSVM: Merge MSRPM in 64-bit chunks on 64-bit kernels Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 30/32] KVM: SVM: Add a helper to allocate and initialize permissions bitmaps Sean Christopherson
2025-06-10 22:57 ` [PATCH v2 31/32] KVM: x86: Simplify userspace filter logic when disabling MSR interception Sean Christopherson
2025-06-11 2:35 ` Mi, Dapeng
2025-06-10 22:57 ` [PATCH v2 32/32] KVM: selftests: Verify KVM disable interception (for userspace) on filter change Sean Christopherson
2025-06-24 19:38 ` [PATCH v2 00/32] KVM: x86: Clean up MSR interception code Sean Christopherson
2025-06-25 12:03 ` Manali Shukla
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250610225737.156318-19-seanjc@google.com \
--to=seanjc@google.com \
--cc=Manali.Shukla@amd.com \
--cc=bp@alien8.de \
--cc=chao.gao@intel.com \
--cc=dapeng1.mi@linux.intel.com \
--cc=francescolavra.fl@gmail.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=pbonzini@redhat.com \
--cc=xin@zytor.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox