* [PATCH v2 01/13] KVM: SVM: Switch svm_copy_lbrs() to a macro
[not found] <20251110222922.613224-1-yosry.ahmed@linux.dev>
@ 2025-11-10 22:29 ` Yosry Ahmed
2025-11-10 22:29 ` [PATCH v2 02/13] KVM: SVM: Add missing save/restore handling of LBR MSRs Yosry Ahmed
2025-11-10 22:29 ` [PATCH v2 04/13] KVM: nSVM: Fix consistency checks for NP_ENABLE Yosry Ahmed
2 siblings, 0 replies; 3+ messages in thread
From: Yosry Ahmed @ 2025-11-10 22:29 UTC (permalink / raw)
To: Sean Christopherson
Cc: Paolo Bonzini, Jim Mattson, kvm, linux-kernel, Yosry Ahmed,
stable
In preparation for using svm_copy_lbrs() with 'struct vmcb_save_area'
without a containing 'struct vmcb', and later even 'struct
vmcb_save_area_cached', make it a macro. Pull the call to
vmcb_mark_dirty() out to the callers.
Macros are generally not preferred compared to functions, mainly due to
type-safety. However, in this case it seems like having a simple macro
copying a few fields is better than copy-pasting the same 5 lines of
code in different places.
On the bright side, pulling vmcb_mark_dirty() calls to the callers makes
it clear that in one case, vmcb_mark_dirty() was being called on VMCB12.
It is not architecturally defined for the CPU to clear arbitrary clean
bits, and it is not needed, so drop that one call.
Technically fixes the non-architectural behavior of setting the dirty
bit on VMCB12.
Fixes: d20c796ca370 ("KVM: x86: nSVM: implement nested LBR virtualization")
Cc: stable@vger.kernel.org
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
---
arch/x86/kvm/svm/nested.c | 16 ++++++++++------
arch/x86/kvm/svm/svm.c | 11 -----------
arch/x86/kvm/svm/svm.h | 10 +++++++++-
3 files changed, 19 insertions(+), 18 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index da6e80b3ac353..a37bd5c1f36fa 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -675,10 +675,12 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
* Reserved bits of DEBUGCTL are ignored. Be consistent with
* svm_set_msr's definition of reserved bits.
*/
- svm_copy_lbrs(vmcb02, vmcb12);
+ svm_copy_lbrs(&vmcb02->save, &vmcb12->save);
+ vmcb_mark_dirty(vmcb02, VMCB_LBR);
vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
} else {
- svm_copy_lbrs(vmcb02, vmcb01);
+ svm_copy_lbrs(&vmcb02->save, &vmcb01->save);
+ vmcb_mark_dirty(vmcb02, VMCB_LBR);
}
svm_update_lbrv(&svm->vcpu);
}
@@ -1184,10 +1186,12 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
if (unlikely(guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
- (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK)))
- svm_copy_lbrs(vmcb12, vmcb02);
- else
- svm_copy_lbrs(vmcb01, vmcb02);
+ (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
+ svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
+ } else {
+ svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
+ vmcb_mark_dirty(vmcb01, VMCB_LBR);
+ }
svm_update_lbrv(vcpu);
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 10c21e4c5406f..711276e8ee84f 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -795,17 +795,6 @@ static void svm_recalc_msr_intercepts(struct kvm_vcpu *vcpu)
*/
}
-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
-{
- to_vmcb->save.dbgctl = from_vmcb->save.dbgctl;
- to_vmcb->save.br_from = from_vmcb->save.br_from;
- to_vmcb->save.br_to = from_vmcb->save.br_to;
- to_vmcb->save.last_excp_from = from_vmcb->save.last_excp_from;
- to_vmcb->save.last_excp_to = from_vmcb->save.last_excp_to;
-
- vmcb_mark_dirty(to_vmcb, VMCB_LBR);
-}
-
static void __svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
to_svm(vcpu)->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index c856d8e0f95e7..f6fb70ddf7272 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -687,8 +687,16 @@ static inline void *svm_vcpu_alloc_msrpm(void)
return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT);
}
+#define svm_copy_lbrs(to, from) \
+({ \
+ (to)->dbgctl = (from)->dbgctl; \
+ (to)->br_from = (from)->br_from; \
+ (to)->br_to = (from)->br_to; \
+ (to)->last_excp_from = (from)->last_excp_from; \
+ (to)->last_excp_to = (from)->last_excp_to; \
+})
+
void svm_vcpu_free_msrpm(void *msrpm);
-void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
void svm_enable_lbrv(struct kvm_vcpu *vcpu);
void svm_update_lbrv(struct kvm_vcpu *vcpu);
--
2.51.2.1041.gc1ab5b90ca-goog
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH v2 02/13] KVM: SVM: Add missing save/restore handling of LBR MSRs
[not found] <20251110222922.613224-1-yosry.ahmed@linux.dev>
2025-11-10 22:29 ` [PATCH v2 01/13] KVM: SVM: Switch svm_copy_lbrs() to a macro Yosry Ahmed
@ 2025-11-10 22:29 ` Yosry Ahmed
2025-11-10 22:29 ` [PATCH v2 04/13] KVM: nSVM: Fix consistency checks for NP_ENABLE Yosry Ahmed
2 siblings, 0 replies; 3+ messages in thread
From: Yosry Ahmed @ 2025-11-10 22:29 UTC (permalink / raw)
To: Sean Christopherson
Cc: Paolo Bonzini, Jim Mattson, kvm, linux-kernel, Yosry Ahmed,
stable
MSR_IA32_DEBUGCTLMSR and LBR MSRs are currently not enumerated by
KVM_GET_MSR_INDEX_LIST, and LBR MSRs cannot be set with KVM_SET_MSRS. So
save/restore is completely broken.
Fix it by adding the MSRs to msrs_to_save_base, and allowing writes to
LBR MSRs from userspace only (as they are read-only MSRs). Additionally,
to correctly restore L1's LBRs while L2 is running, make sure the LBRs
are copied from the captured VMCB01 save area in svm_copy_vmrun_state().
Fixes: 24e09cbf480a ("KVM: SVM: enable LBR virtualization")
Cc: stable@vger.kernel.org
Reported-by: Jim Mattson <jmattson@google.com>
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
---
arch/x86/kvm/svm/nested.c | 3 +++
arch/x86/kvm/svm/svm.c | 20 ++++++++++++++++++++
arch/x86/kvm/x86.c | 3 +++
3 files changed, 26 insertions(+)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index a37bd5c1f36fa..74211c5c68026 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1055,6 +1055,9 @@ void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
to_save->isst_addr = from_save->isst_addr;
to_save->ssp = from_save->ssp;
}
+
+ if (lbrv)
+ svm_copy_lbrs(to_save, from_save);
}
void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 711276e8ee84f..af0e9c26527e3 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2983,6 +2983,26 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
vmcb_mark_dirty(svm->vmcb, VMCB_LBR);
svm_update_lbrv(vcpu);
break;
+ case MSR_IA32_LASTBRANCHFROMIP:
+ if (!msr->host_initiated)
+ return 1;
+ svm->vmcb->save.br_from = data;
+ break;
+ case MSR_IA32_LASTBRANCHTOIP:
+ if (!msr->host_initiated)
+ return 1;
+ svm->vmcb->save.br_to = data;
+ break;
+ case MSR_IA32_LASTINTFROMIP:
+ if (!msr->host_initiated)
+ return 1;
+ svm->vmcb->save.last_excp_from = data;
+ break;
+ case MSR_IA32_LASTINTTOIP:
+ if (!msr->host_initiated)
+ return 1;
+ svm->vmcb->save.last_excp_to = data;
+ break;
case MSR_VM_HSAVE_PA:
/*
* Old kernels did not validate the value written to
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c9c2aa6f4705e..9cb824f9cf644 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -348,6 +348,9 @@ static const u32 msrs_to_save_base[] = {
MSR_IA32_U_CET, MSR_IA32_S_CET,
MSR_IA32_PL0_SSP, MSR_IA32_PL1_SSP, MSR_IA32_PL2_SSP,
MSR_IA32_PL3_SSP, MSR_IA32_INT_SSP_TAB,
+ MSR_IA32_DEBUGCTLMSR,
+ MSR_IA32_LASTBRANCHFROMIP, MSR_IA32_LASTBRANCHTOIP,
+ MSR_IA32_LASTINTFROMIP, MSR_IA32_LASTINTTOIP,
};
static const u32 msrs_to_save_pmu[] = {
--
2.51.2.1041.gc1ab5b90ca-goog
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH v2 04/13] KVM: nSVM: Fix consistency checks for NP_ENABLE
[not found] <20251110222922.613224-1-yosry.ahmed@linux.dev>
2025-11-10 22:29 ` [PATCH v2 01/13] KVM: SVM: Switch svm_copy_lbrs() to a macro Yosry Ahmed
2025-11-10 22:29 ` [PATCH v2 02/13] KVM: SVM: Add missing save/restore handling of LBR MSRs Yosry Ahmed
@ 2025-11-10 22:29 ` Yosry Ahmed
2 siblings, 0 replies; 3+ messages in thread
From: Yosry Ahmed @ 2025-11-10 22:29 UTC (permalink / raw)
To: Sean Christopherson
Cc: Paolo Bonzini, Jim Mattson, kvm, linux-kernel, Yosry Ahmed,
stable
KVM currenty fails a nested VMRUN and injects VMEXIT_INVALID (aka
SVM_EXIT_ERR) if L1 sets NP_ENABLE and the host does not support NPTs.
On first glance, it seems like the check should actually be for
guest_cpu_cap_has(X86_FEATURE_NPT) instead, as it is possible for the
host to support NPTs but the guest CPUID to not advertise it.
However, the consistency check is not architectural to begin with. The
APM does not mention VMEXIT_INVALID if NP_ENABLE is set on a processor
that does not have X86_FEATURE_NPT. Hence, NP_ENABLE should be ignored
if X86_FEATURE_NPT is not available for L1. Apart from the consistency
check, this is currently the case because NP_ENABLE is actually copied
from VMCB01 to VMCB02, not from VMCB12.
On the other hand, the APM does mention two other consistency checks for
NP_ENABLE, both of which are missing (paraphrased):
In Volume #2, 15.25.3 (24593—Rev. 3.42—March 2024):
If VMRUN is executed with hCR0.PG cleared to zero and NP_ENABLE set to
1, VMRUN terminates with #VMEXIT(VMEXIT_INVALID)
In Volume #2, 15.25.4 (24593—Rev. 3.42—March 2024):
When VMRUN is executed with nested paging enabled (NP_ENABLE = 1), the
following conditions are considered illegal state combinations, in
addition to those mentioned in “Canonicalization and Consistency
Checks”:
• Any MBZ bit of nCR3 is set.
• Any G_PAT.PA field has an unsupported type encoding or any
reserved field in G_PAT has a nonzero value.
Replace the existing consistency check with consistency checks on
hCR0.PG and nCR3. Only perform the consistency checks if L1 has
X86_FEATURE_NPT and NP_ENABLE is set in VMCB12. The G_PAT consistency
check will be addressed separately.
As it is now possible for an L1 to run L2 with NP_ENABLE set but
ignored, also check that L1 has X86_FEATURE_NPT in nested_npt_enabled().
Pass L1's CR0 to __nested_vmcb_check_controls(). In
nested_vmcb_check_controls(), L1's CR0 is available through
kvm_read_cr0(), as vcpu->arch.cr0 is not updated to L2's CR0 until later
through nested_vmcb02_prepare_save() -> svm_set_cr0().
In svm_set_nested_state(), L1's CR0 is available in the captured save
area, as svm_get_nested_state() captures L1's save area when running L2,
and L1's CR0 is stashed in VMCB01 on nested VMRUN (in
nested_svm_vmrun()).
Fixes: 4b16184c1cca ("KVM: SVM: Initialize Nested Nested MMU context on VMRUN")
Cc: stable@vger.kernel.org
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
---
arch/x86/kvm/svm/nested.c | 21 ++++++++++++++++-----
arch/x86/kvm/svm/svm.h | 3 ++-
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 74211c5c68026..87bcc5eff96e8 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -325,7 +325,8 @@ static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
}
static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
- struct vmcb_ctrl_area_cached *control)
+ struct vmcb_ctrl_area_cached *control,
+ unsigned long l1_cr0)
{
if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
return false;
@@ -333,8 +334,12 @@ static bool __nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
if (CC(control->asid == 0))
return false;
- if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
- return false;
+ if (nested_npt_enabled(to_svm(vcpu))) {
+ if (CC(!kvm_vcpu_is_legal_gpa(vcpu, control->nested_cr3)))
+ return false;
+ if (CC(!(l1_cr0 & X86_CR0_PG)))
+ return false;
+ }
if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
MSRPM_SIZE)))
@@ -400,7 +405,12 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
struct vcpu_svm *svm = to_svm(vcpu);
struct vmcb_ctrl_area_cached *ctl = &svm->nested.ctl;
- return __nested_vmcb_check_controls(vcpu, ctl);
+ /*
+ * Make sure we did not enter guest mode yet, in which case
+ * kvm_read_cr0() could return L2's CR0.
+ */
+ WARN_ON_ONCE(is_guest_mode(vcpu));
+ return __nested_vmcb_check_controls(vcpu, ctl, kvm_read_cr0(vcpu));
}
static
@@ -1831,7 +1841,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
ret = -EINVAL;
__nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl);
- if (!__nested_vmcb_check_controls(vcpu, &ctl_cached))
+ /* 'save' contains L1 state saved from before VMRUN */
+ if (!__nested_vmcb_check_controls(vcpu, &ctl_cached, save->cr0))
goto out_free;
/*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index f6fb70ddf7272..3e805a43ffcdb 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -552,7 +552,8 @@ static inline bool gif_set(struct vcpu_svm *svm)
static inline bool nested_npt_enabled(struct vcpu_svm *svm)
{
- return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
+ return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_NPT) &&
+ svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE;
}
static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
--
2.51.2.1041.gc1ab5b90ca-goog
^ permalink raw reply related [flat|nested] 3+ messages in thread