All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yosry Ahmed <yosry@kernel.org>
To: Sean Christopherson <seanjc@google.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
	Yosry Ahmed <yosry@kernel.org>
Subject: [PATCH v7 20/26] KVM: nSVM: Cache all used fields from VMCB12
Date: Tue,  3 Mar 2026 00:34:14 +0000	[thread overview]
Message-ID: <20260303003421.2185681-21-yosry@kernel.org> (raw)
In-Reply-To: <20260303003421.2185681-1-yosry@kernel.org>

Currently, most fields used from VMCB12 are cached in
svm->nested.{ctl/save}. This is mainly to avoid TOC-TOU bugs. However,
for the save area, only the fields used in the consistency checks (i.e.
nested_vmcb_check_save()) were being cached. Other fields are read
directly from guest memory in nested_vmcb02_prepare_save().

While probably benign, this still makes it possible for TOC-TOU bugs to
happen. For example, RAX, RSP, and RIP are read twice, once to store in
VMCB02, and once to store in vcpu->arch.regs. It is possible for the
guest to modify the value between both reads, potentially causing nasty
bugs.

Harden against such bugs by caching everything in svm->nested.save.
Cache all the needed fields, and keep all accesses to the VMCB12
strictly in nested_svm_vmrun() for caching and early error injection.
Following changes will further limit the access to the VMCB12 in the
nested VMRUN path.

Introduce vmcb12_is_dirty() to use with the cached control fields
instead of vmcb_is_dirty(), similar to vmcb12_is_intercept().

Opportunistically order the copies in __nested_copy_vmcb_save_to_cache()
by the order in which the fields are defined in struct vmcb_save_area.

Signed-off-by: Yosry Ahmed <yosry@kernel.org>
---
 arch/x86/kvm/svm/nested.c | 118 ++++++++++++++++++++++----------------
 arch/x86/kvm/svm/svm.c    |   2 +-
 arch/x86/kvm/svm/svm.h    |  27 ++++++++-
 3 files changed, 94 insertions(+), 53 deletions(-)

diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 5994e309831d0..f89040a467d4a 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -518,11 +518,11 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
 	to->asid           = from->asid;
 	to->msrpm_base_pa &= ~0x0fffULL;
 	to->iopm_base_pa  &= ~0x0fffULL;
+	to->clean = from->clean;
 
 #ifdef CONFIG_KVM_HYPERV
 	/* Hyper-V extensions (Enlightened VMCB) */
 	if (kvm_hv_hypercall_enabled(vcpu)) {
-		to->clean = from->clean;
 		memcpy(&to->hv_enlightenments, &from->hv_enlightenments,
 		       sizeof(to->hv_enlightenments));
 	}
@@ -538,19 +538,34 @@ void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
 static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to,
 					     struct vmcb_save_area *from)
 {
-	/*
-	 * Copy only fields that are validated, as we need them
-	 * to avoid TOC/TOU races.
-	 */
+	to->es = from->es;
 	to->cs = from->cs;
+	to->ss = from->ss;
+	to->ds = from->ds;
+	to->gdtr = from->gdtr;
+	to->idtr = from->idtr;
+
+	to->cpl = from->cpl;
 
 	to->efer = from->efer;
-	to->cr0 = from->cr0;
-	to->cr3 = from->cr3;
 	to->cr4 = from->cr4;
-
-	to->dr6 = from->dr6;
+	to->cr3 = from->cr3;
+	to->cr0 = from->cr0;
 	to->dr7 = from->dr7;
+	to->dr6 = from->dr6;
+
+	to->rflags = from->rflags;
+	to->rip = from->rip;
+	to->rsp = from->rsp;
+
+	to->s_cet = from->s_cet;
+	to->ssp = from->ssp;
+	to->isst_addr = from->isst_addr;
+
+	to->rax = from->rax;
+	to->cr2 = from->cr2;
+
+	svm_copy_lbrs(to, from);
 }
 
 void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
@@ -692,8 +707,10 @@ static bool nested_vmcb12_has_lbrv(struct kvm_vcpu *vcpu)
 		(to_svm(vcpu)->nested.ctl.misc_ctl2 & SVM_MISC2_ENABLE_V_LBR);
 }
 
-static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
+static void nested_vmcb02_prepare_save(struct vcpu_svm *svm)
 {
+	struct vmcb_ctrl_area_cached *control = &svm->nested.ctl;
+	struct vmcb_save_area_cached *save = &svm->nested.save;
 	bool new_vmcb12 = false;
 	struct vmcb *vmcb01 = svm->vmcb01.ptr;
 	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
@@ -709,48 +726,48 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 		svm->nested.force_msr_bitmap_recalc = true;
 	}
 
-	if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) {
-		vmcb02->save.es = vmcb12->save.es;
-		vmcb02->save.cs = vmcb12->save.cs;
-		vmcb02->save.ss = vmcb12->save.ss;
-		vmcb02->save.ds = vmcb12->save.ds;
-		vmcb02->save.cpl = vmcb12->save.cpl;
+	if (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_SEG))) {
+		vmcb02->save.es = save->es;
+		vmcb02->save.cs = save->cs;
+		vmcb02->save.ss = save->ss;
+		vmcb02->save.ds = save->ds;
+		vmcb02->save.cpl = save->cpl;
 		vmcb_mark_dirty(vmcb02, VMCB_SEG);
 	}
 
-	if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) {
-		vmcb02->save.gdtr = vmcb12->save.gdtr;
-		vmcb02->save.idtr = vmcb12->save.idtr;
+	if (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_DT))) {
+		vmcb02->save.gdtr = save->gdtr;
+		vmcb02->save.idtr = save->idtr;
 		vmcb_mark_dirty(vmcb02, VMCB_DT);
 	}
 
 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
-	    (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_CET)))) {
-		vmcb02->save.s_cet  = vmcb12->save.s_cet;
-		vmcb02->save.isst_addr = vmcb12->save.isst_addr;
-		vmcb02->save.ssp = vmcb12->save.ssp;
+	    (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_CET)))) {
+		vmcb02->save.s_cet  = save->s_cet;
+		vmcb02->save.isst_addr = save->isst_addr;
+		vmcb02->save.ssp = save->ssp;
 		vmcb_mark_dirty(vmcb02, VMCB_CET);
 	}
 
-	kvm_set_rflags(vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
+	kvm_set_rflags(vcpu, save->rflags | X86_EFLAGS_FIXED);
 
 	svm_set_efer(vcpu, svm->nested.save.efer);
 
 	svm_set_cr0(vcpu, svm->nested.save.cr0);
 	svm_set_cr4(vcpu, svm->nested.save.cr4);
 
-	svm->vcpu.arch.cr2 = vmcb12->save.cr2;
+	svm->vcpu.arch.cr2 = save->cr2;
 
-	kvm_rax_write(vcpu, vmcb12->save.rax);
-	kvm_rsp_write(vcpu, vmcb12->save.rsp);
-	kvm_rip_write(vcpu, vmcb12->save.rip);
+	kvm_rax_write(vcpu, save->rax);
+	kvm_rsp_write(vcpu, save->rsp);
+	kvm_rip_write(vcpu, save->rip);
 
 	/* In case we don't even reach vcpu_run, the fields are not updated */
-	vmcb02->save.rax = vmcb12->save.rax;
-	vmcb02->save.rsp = vmcb12->save.rsp;
-	vmcb02->save.rip = vmcb12->save.rip;
+	vmcb02->save.rax = save->rax;
+	vmcb02->save.rsp = save->rsp;
+	vmcb02->save.rip = save->rip;
 
-	if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
+	if (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_DR))) {
 		vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
 		svm->vcpu.arch.dr6  = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
 		vmcb_mark_dirty(vmcb02, VMCB_DR);
@@ -761,7 +778,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
 		 * Reserved bits of DEBUGCTL are ignored.  Be consistent with
 		 * svm_set_msr's definition of reserved bits.
 		 */
-		svm_copy_lbrs(&vmcb02->save, &vmcb12->save);
+		svm_copy_lbrs(&vmcb02->save, save);
 		vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
 	} else {
 		svm_copy_lbrs(&vmcb02->save, &vmcb01->save);
@@ -984,28 +1001,29 @@ static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to
 	to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl;
 }
 
-int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
-			 struct vmcb *vmcb12, bool from_vmrun)
+int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, bool from_vmrun)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
+	struct vmcb_ctrl_area_cached *control = &svm->nested.ctl;
+	struct vmcb_save_area_cached *save = &svm->nested.save;
 	int ret;
 
 	trace_kvm_nested_vmenter(svm->vmcb->save.rip,
 				 vmcb12_gpa,
-				 vmcb12->save.rip,
-				 vmcb12->control.int_ctl,
-				 vmcb12->control.event_inj,
-				 vmcb12->control.misc_ctl,
-				 vmcb12->control.nested_cr3,
-				 vmcb12->save.cr3,
+				 save->rip,
+				 control->int_ctl,
+				 control->event_inj,
+				 control->misc_ctl,
+				 control->nested_cr3,
+				 save->cr3,
 				 KVM_ISA_SVM);
 
-	trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
-				    vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
-				    vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
-				    vmcb12->control.intercepts[INTERCEPT_WORD3],
-				    vmcb12->control.intercepts[INTERCEPT_WORD4],
-				    vmcb12->control.intercepts[INTERCEPT_WORD5]);
+	trace_kvm_nested_intercepts(control->intercepts[INTERCEPT_CR] & 0xffff,
+				    control->intercepts[INTERCEPT_CR] >> 16,
+				    control->intercepts[INTERCEPT_EXCEPTION],
+				    control->intercepts[INTERCEPT_WORD3],
+				    control->intercepts[INTERCEPT_WORD4],
+				    control->intercepts[INTERCEPT_WORD5]);
 
 
 	svm->nested.vmcb12_gpa = vmcb12_gpa;
@@ -1015,8 +1033,8 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
 	nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
 
 	svm_switch_vmcb(svm, &svm->nested.vmcb02);
-	nested_vmcb02_prepare_control(svm, vmcb12->save.rip, vmcb12->save.cs.base);
-	nested_vmcb02_prepare_save(svm, vmcb12);
+	nested_vmcb02_prepare_control(svm, save->rip, save->cs.base);
+	nested_vmcb02_prepare_save(svm);
 
 	ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
 				  nested_npt_enabled(svm), from_vmrun);
@@ -1104,7 +1122,7 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
 
 	svm->nested.nested_run_pending = 1;
 
-	if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
+	if (enter_svm_guest_mode(vcpu, vmcb12_gpa, true))
 		goto out_exit_err;
 
 	if (nested_svm_merge_msrpm(vcpu))
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 94e14badddfa2..19112ec48c0f7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4885,7 +4885,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
 	vmcb12 = map.hva;
 	nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
 	nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
-	ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false);
+	ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, false);
 
 	if (ret)
 		goto unmap_save;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 304328c33e960..388aaa5d63d29 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -140,13 +140,32 @@ struct kvm_vmcb_info {
 };
 
 struct vmcb_save_area_cached {
+	struct vmcb_seg es;
 	struct vmcb_seg cs;
+	struct vmcb_seg ss;
+	struct vmcb_seg ds;
+	struct vmcb_seg gdtr;
+	struct vmcb_seg idtr;
+	u8 cpl;
 	u64 efer;
 	u64 cr4;
 	u64 cr3;
 	u64 cr0;
 	u64 dr7;
 	u64 dr6;
+	u64 rflags;
+	u64 rip;
+	u64 rsp;
+	u64 s_cet;
+	u64 ssp;
+	u64 isst_addr;
+	u64 rax;
+	u64 cr2;
+	u64 dbgctl;
+	u64 br_from;
+	u64 br_to;
+	u64 last_excp_from;
+	u64 last_excp_to;
 };
 
 struct vmcb_ctrl_area_cached {
@@ -421,6 +440,11 @@ static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
         return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
 }
 
+static inline bool vmcb12_is_dirty(struct vmcb_ctrl_area_cached *control, int bit)
+{
+	return !test_bit(bit, (unsigned long *)&control->clean);
+}
+
 static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
 {
 	return container_of(vcpu, struct vcpu_svm, vcpu);
@@ -785,8 +809,7 @@ static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
 
 int __init nested_svm_init_msrpm_merge_offsets(void);
 
-int enter_svm_guest_mode(struct kvm_vcpu *vcpu,
-			 u64 vmcb_gpa, struct vmcb *vmcb12, bool from_vmrun);
+int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, bool from_vmrun);
 void svm_leave_nested(struct kvm_vcpu *vcpu);
 void svm_free_nested(struct vcpu_svm *svm);
 int svm_allocate_nested(struct vcpu_svm *svm);
-- 
2.53.0.473.g4a7958ca14-goog


  parent reply	other threads:[~2026-03-03  0:34 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-03  0:33 [PATCH v7 00/26] Nested SVM fixes, cleanups, and hardening Yosry Ahmed
2026-03-03  0:33 ` [PATCH v7 01/26] KVM: nSVM: Avoid clearing VMCB_LBR in vmcb12 Yosry Ahmed
2026-03-03  0:33 ` [PATCH v7 02/26] KVM: SVM: Switch svm_copy_lbrs() to a macro Yosry Ahmed
2026-03-03  0:33 ` [PATCH v7 03/26] KVM: SVM: Add missing save/restore handling of LBR MSRs Yosry Ahmed
2026-03-03 16:37   ` Sean Christopherson
2026-03-03 19:14     ` Yosry Ahmed
2026-03-04  0:44       ` Sean Christopherson
2026-03-04  0:48         ` Yosry Ahmed
2026-03-03  0:33 ` [PATCH v7 04/26] KVM: selftests: Add a test for LBR save/restore (ft. nested) Yosry Ahmed
2026-03-03  0:33 ` [PATCH v7 05/26] KVM: nSVM: Always inject a #GP if mapping VMCB12 fails on nested VMRUN Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 06/26] KVM: nSVM: Refactor checking LBRV enablement in vmcb12 into a helper Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 07/26] KVM: nSVM: Refactor writing vmcb12 on nested #VMEXIT as " Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 08/26] KVM: nSVM: Triple fault if mapping VMCB12 fails on nested #VMEXIT Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 09/26] KVM: nSVM: Triple fault if restore host CR3 " Yosry Ahmed
2026-03-03 16:49   ` Sean Christopherson
2026-03-03 19:15     ` Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 10/26] KVM: nSVM: Clear GIF on nested #VMEXIT(INVALID) Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 11/26] KVM: nSVM: Clear EVENTINJ fields in vmcb12 on nested #VMEXIT Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 12/26] KVM: nSVM: Clear tracking of L1->L2 NMI and soft IRQ " Yosry Ahmed
2026-03-03 16:50   ` Sean Christopherson
2026-03-03 19:15     ` Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 13/26] KVM: nSVM: Drop nested_vmcb_check_{save/control}() wrappers Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 14/26] KVM: nSVM: Drop the non-architectural consistency check for NP_ENABLE Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 15/26] KVM: nSVM: Add missing consistency check for nCR3 validity Yosry Ahmed
2026-03-03 16:56   ` Sean Christopherson
2026-03-03 19:17     ` Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 16/26] KVM: nSVM: Add missing consistency check for EFER, CR0, CR4, and CS Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 17/26] KVM: nSVM: Add missing consistency check for EVENTINJ Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 18/26] KVM: SVM: Rename vmcb->nested_ctl to vmcb->misc_ctl Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 19/26] KVM: SVM: Rename vmcb->virt_ext to vmcb->misc_ctl2 Yosry Ahmed
2026-03-03  0:34 ` Yosry Ahmed [this message]
2026-03-03  0:34 ` [PATCH v7 21/26] KVM: nSVM: Restrict mapping vmcb12 on nested VMRUN Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 22/26] KVM: nSVM: Use PAGE_MASK to drop lower bits of bitmap GPAs from vmcb12 Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 23/26] KVM: nSVM: Sanitize TLB_CONTROL field when copying " Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 24/26] KVM: nSVM: Sanitize INT/EVENTINJ fields " Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 25/26] KVM: nSVM: Only copy SVM_MISC_ENABLE_NP from VMCB01's misc_ctl Yosry Ahmed
2026-03-03  0:34 ` [PATCH v7 26/26] KVM: selftest: Add a selftest for VMRUN/#VMEXIT with unmappable vmcb12 Yosry Ahmed
2026-03-05 22:30   ` Jim Mattson
2026-03-05 22:52     ` Yosry Ahmed
2026-03-06  0:05       ` Jim Mattson
2026-03-06  0:40         ` Yosry Ahmed
2026-03-06  1:17           ` Jim Mattson
2026-03-06  1:39             ` Sean Christopherson
2026-03-06  1:46               ` Jim Mattson
2026-03-06 15:52               ` Yosry Ahmed
2026-03-06 17:54                 ` Yosry Ahmed
2026-03-06 22:15                   ` Jim Mattson
2026-03-06 22:35                     ` Yosry Ahmed
2026-03-06 16:09               ` Yosry Ahmed
2026-03-06 16:35                 ` Sean Christopherson
2026-03-06 17:25                   ` Yosry Ahmed
2026-03-05 17:08 ` [PATCH v7 00/26] Nested SVM fixes, cleanups, and hardening Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260303003421.2185681-21-yosry@kernel.org \
    --to=yosry@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.