public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Josh Poimboeuf <jpoimboe@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Borislav Petkov <bp@suse.de>, Sasha Levin <sashal@kernel.org>,
	tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, x86@kernel.org, seanjc@google.com,
	pbonzini@redhat.com, pawan.kumar.gupta@linux.intel.com,
	sblbir@amazon.com, kim.phillips@amd.com,
	alexandre.chartre@oracle.com, kvm@vger.kernel.org
Subject: [PATCH AUTOSEL 5.15 22/42] KVM: VMX: Prevent guest RSB poisoning attacks with eIBRS
Date: Tue, 19 Jul 2022 21:13:30 -0400	[thread overview]
Message-ID: <20220720011350.1024134-22-sashal@kernel.org> (raw)
In-Reply-To: <20220720011350.1024134-1-sashal@kernel.org>

From: Josh Poimboeuf <jpoimboe@kernel.org>

[ Upstream commit fc02735b14fff8c6678b521d324ade27b1a3d4cf ]

On eIBRS systems, the returns in the vmexit return path from
__vmx_vcpu_run() to vmx_vcpu_run() are exposed to RSB poisoning attacks.

Fix that by moving the post-vmexit spec_ctrl handling to immediately
after the vmexit.

Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 arch/x86/include/asm/nospec-branch.h |  1 +
 arch/x86/kernel/cpu/bugs.c           |  4 +++
 arch/x86/kvm/vmx/run_flags.h         |  1 +
 arch/x86/kvm/vmx/vmenter.S           | 49 +++++++++++++++++++++-------
 arch/x86/kvm/vmx/vmx.c               | 48 +++++++++++++++------------
 arch/x86/kvm/vmx/vmx.h               |  1 +
 6 files changed, 73 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 6189609bede3..4ef9cc93d584 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -220,6 +220,7 @@ static inline void indirect_branch_prediction_barrier(void)
 
 /* The Intel SPEC CTRL MSR base value cache */
 extern u64 x86_spec_ctrl_base;
+extern u64 x86_spec_ctrl_current;
 extern void write_spec_ctrl_current(u64 val, bool force);
 
 /*
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c59db48472dc..13a2ee4737f6 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -199,6 +199,10 @@ void __init check_bugs(void)
 #endif
 }
 
+/*
+ * NOTE: For VMX, this function is not called in the vmexit path.
+ * It uses vmx_spec_ctrl_restore_host() instead.
+ */
 void
 x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
 {
diff --git a/arch/x86/kvm/vmx/run_flags.h b/arch/x86/kvm/vmx/run_flags.h
index 57f4c664ea9c..edc3f16cc189 100644
--- a/arch/x86/kvm/vmx/run_flags.h
+++ b/arch/x86/kvm/vmx/run_flags.h
@@ -3,5 +3,6 @@
 #define __KVM_X86_VMX_RUN_FLAGS_H
 
 #define VMX_RUN_VMRESUME	(1 << 0)
+#define VMX_RUN_SAVE_SPEC_CTRL	(1 << 1)
 
 #endif /* __KVM_X86_VMX_RUN_FLAGS_H */
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index ddc3bf85db33..8641ea74a307 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -33,9 +33,10 @@
 
 /**
  * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
- * @vmx:	struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
+ * @vmx:	struct vcpu_vmx *
  * @regs:	unsigned long * (to guest registers)
- * @flags:	VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
+ * @flags:	VMX_RUN_VMRESUME:	use VMRESUME instead of VMLAUNCH
+ *		VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
  *
  * Returns:
  *	0 on VM-Exit, 1 on VM-Fail
@@ -54,6 +55,12 @@ SYM_FUNC_START(__vmx_vcpu_run)
 #endif
 	push %_ASM_BX
 
+	/* Save @vmx for SPEC_CTRL handling */
+	push %_ASM_ARG1
+
+	/* Save @flags for SPEC_CTRL handling */
+	push %_ASM_ARG3
+
 	/*
 	 * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
 	 * @regs is needed after VM-Exit to save the guest's register values.
@@ -149,25 +156,23 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
 	mov %r15, VCPU_R15(%_ASM_AX)
 #endif
 
-	/* IMPORTANT: RSB must be stuffed before the first return. */
-	FILL_RETURN_BUFFER %_ASM_BX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
-
-	/* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
-	xor %eax, %eax
+	/* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
+	xor %ebx, %ebx
 
 .Lclear_regs:
 	/*
-	 * Clear all general purpose registers except RSP and RAX to prevent
+	 * Clear all general purpose registers except RSP and RBX to prevent
 	 * speculative use of the guest's values, even those that are reloaded
 	 * via the stack.  In theory, an L1 cache miss when restoring registers
 	 * could lead to speculative execution with the guest's values.
 	 * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
 	 * free.  RSP and RAX are exempt as RSP is restored by hardware during
-	 * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
+	 * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
+	 * value.
 	 */
+	xor %eax, %eax
 	xor %ecx, %ecx
 	xor %edx, %edx
-	xor %ebx, %ebx
 	xor %ebp, %ebp
 	xor %esi, %esi
 	xor %edi, %edi
@@ -185,6 +190,28 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
 	/* "POP" @regs. */
 	add $WORD_SIZE, %_ASM_SP
 
+	/*
+	 * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
+	 * the first unbalanced RET after vmexit!
+	 *
+	 * For retpoline, RSB filling is needed to prevent poisoned RSB entries
+	 * and (in some cases) RSB underflow.
+	 *
+	 * eIBRS has its own protection against poisoned RSB, so it doesn't
+	 * need the RSB filling sequence.  But it does need to be enabled
+	 * before the first unbalanced RET.
+         */
+
+	FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
+
+	pop %_ASM_ARG2	/* @flags */
+	pop %_ASM_ARG1	/* @vmx */
+
+	call vmx_spec_ctrl_restore_host
+
+	/* Put return value in AX */
+	mov %_ASM_BX, %_ASM_AX
+
 	pop %_ASM_BX
 #ifdef CONFIG_X86_64
 	pop %r12
@@ -204,7 +231,7 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
 	ud2
 .Lvmfail:
 	/* VM-Fail: set return value to 1 */
-	mov $1, %eax
+	mov $1, %_ASM_BX
 	jmp .Lclear_regs
 
 SYM_FUNC_END(__vmx_vcpu_run)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index bac08c265ddf..f668df996668 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -842,6 +842,14 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
 	if (vmx->loaded_vmcs->launched)
 		flags |= VMX_RUN_VMRESUME;
 
+	/*
+	 * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
+	 * to change it directly without causing a vmexit.  In that case read
+	 * it after vmexit and store it in vmx->spec_ctrl.
+	 */
+	if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
+		flags |= VMX_RUN_SAVE_SPEC_CTRL;
+
 	return flags;
 }
 
@@ -6664,6 +6672,26 @@ void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
 	}
 }
 
+void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
+					unsigned int flags)
+{
+	u64 hostval = this_cpu_read(x86_spec_ctrl_current);
+
+	if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
+		return;
+
+	if (flags & VMX_RUN_SAVE_SPEC_CTRL)
+		vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
+
+	/*
+	 * If the guest/host SPEC_CTRL values differ, restore the host value.
+	 */
+	if (vmx->spec_ctrl != hostval)
+		native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
+
+	barrier_nospec();
+}
+
 static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
 {
 	switch (to_vmx(vcpu)->exit_reason.basic) {
@@ -6799,26 +6827,6 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	/* The actual VMENTER/EXIT is in the .noinstr.text section. */
 	vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
 
-	/*
-	 * We do not use IBRS in the kernel. If this vCPU has used the
-	 * SPEC_CTRL MSR it may have left it on; save the value and
-	 * turn it off. This is much more efficient than blindly adding
-	 * it to the atomic save/restore list. Especially as the former
-	 * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
-	 *
-	 * For non-nested case:
-	 * If the L01 MSR bitmap does not intercept the MSR, then we need to
-	 * save it.
-	 *
-	 * For nested case:
-	 * If the L02 MSR bitmap does not intercept the MSR, then we need to
-	 * save it.
-	 */
-	if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
-		vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
-
-	x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
-
 	/* All fields are clean at this point */
 	if (static_branch_unlikely(&enable_evmcs)) {
 		current_evmcs->hv_clean_fields |=
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 35476115cbd0..a8b8150252bb 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -383,6 +383,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
 void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
 void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
+void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
 unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
 bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
 		    unsigned int flags);
-- 
2.35.1


  parent reply	other threads:[~2022-07-20  1:20 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-20  1:13 [PATCH AUTOSEL 5.15 01/42] Revert "evm: Fix memleak in init_desc" Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 02/42] ARM: rockchip: Add missing of_node_put() in rockchip_suspend_init() Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 03/42] x86/kvm/vmx: Make noinstr clean Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 04/42] x86/cpufeatures: Move RETPOLINE flags to word 11 Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 05/42] x86/retpoline: Swizzle retpoline thunk Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 06/42] x86/vsyscall_emu/64: Don't use RET in vsyscall emulation Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 07/42] x86/sev: Avoid using __x86_return_thunk Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 08/42] objtool: Treat .text.__x86.* as noinstr Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 09/42] x86/bugs: Report AMD retbleed vulnerability Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 10/42] x86/bugs: Add AMD retbleed= boot parameter Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 11/42] x86/bugs: Enable STIBP for JMP2RET Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 12/42] x86/bugs: Keep a per-CPU IA32_SPEC_CTRL value Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 13/42] x86/bugs: Optimize SPEC_CTRL MSR writes Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 14/42] x86/bugs: Split spectre_v2_select_mitigation() and spectre_v2_user_select_mitigation() Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 15/42] x86/bugs: Report Intel retbleed vulnerability Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 16/42] x86/xen: Rename SYS* entry points Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 17/42] x86/bugs: Do IBPB fallback check only once Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 18/42] x86/cpu/amd: Add Spectral Chicken Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 19/42] x86/speculation: Fix RSB filling with CONFIG_RETPOLINE=n Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 20/42] KVM: VMX: Flatten __vmx_vcpu_run() Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 21/42] KVM: VMX: Convert launched argument to flags Sasha Levin
2022-07-20  1:13 ` Sasha Levin [this message]
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 23/42] wifi: mac80211: check skb_shared in ieee80211_8023_xmit() Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 24/42] wifi: mac80211: do not wake queues on a vif that is being stopped Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 25/42] wifi: cfg80211: Allow P2P client interface to indicate port authorization Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 26/42] drm: panel-orientation-quirks: Add quirk for the Lenovo Yoga Tablet 2 830 Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 27/42] nilfs2: fix incorrect masking of permission flags for symlinks Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 28/42] ASoC: ti: omap-mcbsp: duplicate sysfs error Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 29/42] ASoC: tlv320adcx140: Fix tx_mask check Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 30/42] ASoC: wm5102: Fix event generation for output compensation Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 31/42] ASoC: wm8998: Fix event generation for input mux Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 32/42] ASoC: cs47l92: Fix event generation for OUT1 demux Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 33/42] ASoC: arizona: Update arizona_aif_cfg_changed to use RX_BCLK_RATE Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 34/42] scsi: target: Fix WRITE_SAME No Data Buffer crash Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 35/42] x86/kexec: Disable RET on kexec Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 36/42] platform/x86: asus-wmi: Add key mappings Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 37/42] platform/x86: intel_atomisp2_led: Also turn off the always-on camera LED on the Asus T100TAF Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 38/42] scsi: ufs: core: Fix missing clk change notification on host reset Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 39/42] scsi: pm80xx: Fix 'Unknown' max/min linkrate Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 40/42] scsi: pm80xx: Set stopped phy's linkrate to Disabled Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 41/42] KVM: nVMX: Always enable TSC scaling for L2 when it was enabled for L1 Sasha Levin
2022-07-20  1:13 ` [PATCH AUTOSEL 5.15 42/42] ubsan: disable UBSAN_DIV_ZERO for clang Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220720011350.1024134-22-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=alexandre.chartre@oracle.com \
    --cc=bp@alien8.de \
    --cc=bp@suse.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=jpoimboe@kernel.org \
    --cc=kim.phillips@amd.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pawan.kumar.gupta@linux.intel.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=sblbir@amazon.com \
    --cc=seanjc@google.com \
    --cc=stable@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox