public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Xin Li <xin3.li@intel.com>
To: linux-kernel@vger.kernel.org, x86@kernel.org, kvm@vger.kernel.org
Cc: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, hpa@zytor.com, peterz@infradead.org,
	andrew.cooper3@citrix.com, seanjc@google.com,
	pbonzini@redhat.com, ravi.v.shankar@intel.com,
	jiangshanlai@gmail.com, shan.kang@intel.com
Subject: [PATCH v7 33/33] KVM: x86/vmx: refactor VMX_DO_EVENT_IRQOFF to generate FRED stack frames
Date: Tue,  4 Apr 2023 03:27:16 -0700	[thread overview]
Message-ID: <20230404102716.1795-34-xin3.li@intel.com> (raw)
In-Reply-To: <20230404102716.1795-1-xin3.li@intel.com>

Comparing to an IDT stack frame, a FRED stack frame has extra 16 bytes of
information pushed at the regular stack top and 8 bytes of error code _always_
pushed at the regular stack bottom, VMX_DO_EVENT_IRQOFF can be refactored
to generate FRED stack frames with event type and vector properly set. Thus,
IRQ/NMI can be handled with the existing approach when FRED is enabled.

As a FRED stack frame always contains an error code pushed by hardware, call
a trampoline function first to have the return instruction address pushed on
the regular stack. Then the trampoline function pushes an error code (0 for
both IRQ and NMI) and jumps to fred_entrypoint_kernel() for NMI handling or
calls external_interrupt() for IRQ handling.

The trampoline function for IRQ handling pushes general purpose registers to
form a pt_regs structure and then use it to call external_interrupt(). As a
result, IRQ handling does not execute any noinstr code.

Export fred_entrypoint_kernel() and external_interrupt() for above changes.

Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
---

Changes since v6:
* Export fred_entrypoint_kernel(), required when kvm-intel built as a module.
* Reserve a REDZONE for CALL emulation and Align RSP to a 64-byte boundary
  before pushing a new FRED stack frame.
---
 arch/x86/entry/entry_64_fred.S        |  1 +
 arch/x86/include/asm/asm-prototypes.h |  1 +
 arch/x86/include/asm/fred.h           |  1 +
 arch/x86/include/asm/traps.h          |  2 +
 arch/x86/kernel/traps.c               |  5 ++
 arch/x86/kvm/vmx/vmenter.S            | 74 ++++++++++++++++++++++++++-
 arch/x86/kvm/vmx/vmx.c                | 16 +++++-
 7 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
index efe2bcd11273..de74ab97ff00 100644
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -59,3 +59,4 @@ SYM_CODE_START_NOALIGN(fred_entrypoint_kernel)
 	FRED_EXIT
 	ERETS
 SYM_CODE_END(fred_entrypoint_kernel)
+EXPORT_SYMBOL(fred_entrypoint_kernel)
diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h
index b1a98fa38828..076bf8dee702 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -12,6 +12,7 @@
 #include <asm/special_insns.h>
 #include <asm/preempt.h>
 #include <asm/asm.h>
+#include <asm/fred.h>
 #include <asm/gsseg.h>
 
 #ifndef CONFIG_X86_CMPXCHG64
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index f7caf3b2f3f7..d00b9cab6aa6 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -129,6 +129,7 @@ DECLARE_FRED_HANDLER(fred_exc_machine_check);
  * The actual assembly entry and exit points
  */
 extern __visible void fred_entrypoint_user(void);
+extern __visible void fred_entrypoint_kernel(void);
 
 /*
  * Initialization
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 612b3d6fec53..017b95624325 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -58,4 +58,6 @@ typedef DECLARE_SYSTEM_INTERRUPT_HANDLER((*system_interrupt_handler));
 
 system_interrupt_handler get_system_interrupt_handler(unsigned int i);
 
+int external_interrupt(struct pt_regs *regs);
+
 #endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 73471053ed02..0f1fcd53cb52 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -1573,6 +1573,11 @@ int external_interrupt(struct pt_regs *regs)
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+/* For KVM VMX to handle IRQs in IRQ induced VM exits. */
+EXPORT_SYMBOL_GPL(external_interrupt);
+#endif
+
 #endif /* CONFIG_X86_64 */
 
 void __init install_system_interrupt_handler(unsigned int n, const void *asm_addr, const void *addr)
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 631fd7da2bc3..f64b05b3d775 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -2,12 +2,14 @@
 #include <linux/linkage.h>
 #include <asm/asm.h>
 #include <asm/bitsperlong.h>
+#include <asm/fred.h>
 #include <asm/kvm_vcpu_regs.h>
 #include <asm/nospec-branch.h>
 #include <asm/percpu.h>
 #include <asm/segment.h>
 #include "kvm-asm-offsets.h"
 #include "run_flags.h"
+#include "../../entry/calling.h"
 
 #define WORD_SIZE (BITS_PER_LONG / 8)
 
@@ -31,7 +33,7 @@
 #define VCPU_R15	__VCPU_REGS_R15 * WORD_SIZE
 #endif
 
-.macro VMX_DO_EVENT_IRQOFF call_insn call_target
+.macro VMX_DO_EVENT_IRQOFF call_insn call_target fred=0 nmi=0
 	/*
 	 * Unconditionally create a stack frame, getting the correct RSP on the
 	 * stack (for x86-64) would take two instructions anyways, and RBP can
@@ -41,16 +43,56 @@
 	mov %_ASM_SP, %_ASM_BP
 
 #ifdef CONFIG_X86_64
+	.if \fred
+#ifdef CONFIG_X86_FRED
+	/*
+	 * It's not necessary to change current stack level for handling IRQ/NMI
+	 * because the state of the kernel stack is well defined in this place
+	 * in the code, and it is known not to be deep in a bunch of nested I/O
+	 * layer handlers that eat up the stack.
+	 */
+
+	/* Reserve a REDZONE for CALL emulation. */
+	sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
+
+	/* Align RSP to a 64-byte boundary before pushing a new stack frame */
+	and $FRED_STACK_FRAME_RSP_MASK, %rsp
+
+	push $0		/* Reserved by FRED, must be 0 */
+	push $0		/* FRED event data, 0 for NMI and external interrupts */
+#endif
+	.else
 	/*
 	 * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
 	 * creating the synthetic interrupt stack frame for the IRQ/NMI.
 	 */
 	and  $-16, %rsp
+	.endif
+
+	.if \fred
+	.if \nmi
+	mov $(2 << 32 | 2 << 48), %_ASM_AX	/* NMI event type and vector */
+	.else
+	mov %_ASM_ARG1, %_ASM_AX
+	shl $32, %_ASM_AX			/* external interrupt vector */
+	.endif
+	add $__KERNEL_DS, %_ASM_AX
+	bts $57, %_ASM_AX			/* bit 57: 64-bit mode */
+	push %_ASM_AX
+	.else
 	push $__KERNEL_DS
+	.endif
+
 	push %rbp
 #endif
 	pushf
+	.if \nmi
+	mov $__KERNEL_CS, %_ASM_AX
+	bts $28, %_ASM_AX			/* set the NMI bit */
+	push %_ASM_AX
+	.else
 	push $__KERNEL_CS
+	.endif
 	\call_insn \call_target
 
 	/*
@@ -300,9 +342,19 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
 SYM_FUNC_END(__vmx_vcpu_run)
 
 SYM_FUNC_START(vmx_do_nmi_irqoff)
-	VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+	VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx nmi=1
 SYM_FUNC_END(vmx_do_nmi_irqoff)
 
+#ifdef CONFIG_X86_FRED
+SYM_FUNC_START(vmx_do_fred_nmi_trampoline)
+	push $0		/* FRED error code, 0 for NMI */
+	jmp fred_entrypoint_kernel
+SYM_FUNC_END(vmx_do_fred_nmi_trampoline)
+
+SYM_FUNC_START(vmx_do_fred_nmi_irqoff)
+	VMX_DO_EVENT_IRQOFF call vmx_do_fred_nmi_trampoline fred=1 nmi=1
+SYM_FUNC_END(vmx_do_fred_nmi_irqoff)
+#endif
 
 .section .text, "ax"
 
@@ -360,3 +412,21 @@ SYM_FUNC_END(vmread_error_trampoline)
 SYM_FUNC_START(vmx_do_interrupt_irqoff)
 	VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
 SYM_FUNC_END(vmx_do_interrupt_irqoff)
+
+#ifdef CONFIG_X86_FRED
+SYM_FUNC_START(vmx_do_fred_interrupt_trampoline)
+	push $0	/* FRED error code, 0 for NMI and external interrupts */
+	PUSH_REGS
+
+	movq	%rsp, %rdi	/* %rdi -> pt_regs */
+	call external_interrupt
+
+	POP_REGS
+	addq $8,%rsp		/* Drop FRED error code */
+	RET
+SYM_FUNC_END(vmx_do_fred_interrupt_trampoline)
+
+SYM_FUNC_START(vmx_do_fred_interrupt_irqoff)
+	VMX_DO_EVENT_IRQOFF call vmx_do_fred_interrupt_trampoline fred=1
+SYM_FUNC_END(vmx_do_fred_interrupt_irqoff)
+#endif
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d2d6e1b6c788..6dfe692dfd6a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6875,7 +6875,9 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 }
 
 void vmx_do_interrupt_irqoff(unsigned long entry);
+void vmx_do_fred_interrupt_irqoff(unsigned int vector);
 void vmx_do_nmi_irqoff(void);
+void vmx_do_fred_nmi_irqoff(void);
 
 static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
 {
@@ -6923,7 +6925,12 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 		return;
 
 	kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
-	vmx_do_interrupt_irqoff(gate_offset(desc));
+#ifdef CONFIG_X86_64
+	if (cpu_feature_enabled(X86_FEATURE_FRED))
+		vmx_do_fred_interrupt_irqoff(vector);
+	else
+#endif
+		vmx_do_interrupt_irqoff(gate_offset(desc));
 	kvm_after_interrupt(vcpu);
 
 	vcpu->arch.at_instruction_boundary = true;
@@ -7209,7 +7216,12 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 	if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
 	    is_nmi(vmx_get_intr_info(vcpu))) {
 		kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
-		vmx_do_nmi_irqoff();
+#ifdef CONFIG_X86_64
+		if (cpu_feature_enabled(X86_FEATURE_FRED))
+			vmx_do_fred_nmi_irqoff();
+		else
+#endif
+			vmx_do_nmi_irqoff();
 		kvm_after_interrupt(vcpu);
 	}
 
-- 
2.34.1


      parent reply	other threads:[~2023-04-04 10:56 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-04 10:26 [PATCH v7 00/33] x86: enable FRED for x86-64 Xin Li
2023-04-04 10:26 ` [PATCH v7 01/33] x86/traps: let common_interrupt() handle IRQ_MOVE_CLEANUP_VECTOR Xin Li
2023-04-04 10:26 ` [PATCH v7 02/33] x86/fred: make unions for the cs and ss fields in struct pt_regs Xin Li
2023-04-04 10:26 ` [PATCH v7 03/33] x86/traps: add a system interrupt table for system interrupt dispatch Xin Li
2023-04-04 10:26 ` [PATCH v7 04/33] x86/traps: add install_system_interrupt_handler() Xin Li
2023-04-04 10:26 ` [PATCH v7 05/33] x86/traps: add external_interrupt() to dispatch external interrupts Xin Li
2023-04-04 10:26 ` [PATCH v7 06/33] x86/cpufeature: add the cpu feature bit for FRED Xin Li
2023-04-04 10:26 ` [PATCH v7 07/33] x86/opcode: add ERETU, ERETS instructions to x86-opcode-map Xin Li
2023-04-04 10:26 ` [PATCH v7 08/33] x86/objtool: teach objtool about ERETU and ERETS Xin Li
2023-04-04 10:26 ` [PATCH v7 09/33] x86/cpu: add X86_CR4_FRED macro Xin Li
2023-04-04 10:26 ` [PATCH v7 10/33] x86/fred: add Kconfig option for FRED (CONFIG_X86_FRED) Xin Li
2023-04-04 10:26 ` [PATCH v7 11/33] x86/fred: if CONFIG_X86_FRED is disabled, disable FRED support Xin Li
2023-04-04 10:26 ` [PATCH v7 12/33] x86/cpu: add MSR numbers for FRED configuration Xin Li
2023-04-04 10:26 ` [PATCH v7 13/33] x86/fred: header file for event types Xin Li
2023-04-04 10:26 ` [PATCH v7 14/33] x86/fred: header file with FRED definitions Xin Li
2023-04-04 10:26 ` [PATCH v7 15/33] x86/fred: reserve space for the FRED stack frame Xin Li
2023-04-04 10:26 ` [PATCH v7 16/33] x86/fred: add a page fault entry stub for FRED Xin Li
2023-04-04 10:27 ` [PATCH v7 17/33] x86/fred: add a debug " Xin Li
2023-04-04 10:27 ` [PATCH v7 18/33] x86/fred: add a NMI " Xin Li
2023-04-04 10:27 ` [PATCH v7 19/33] x86/fred: add a machine check " Xin Li
2023-04-04 10:27 ` [PATCH v7 20/33] x86/fred: FRED entry/exit and dispatch code Xin Li
2023-04-04 10:27 ` [PATCH v7 21/33] x86/fred: FRED initialization code Xin Li
2023-04-04 10:27 ` [PATCH v7 22/33] x86/fred: update MSR_IA32_FRED_RSP0 during task switch Xin Li
2023-04-04 10:27 ` [PATCH v7 23/33] x86/fred: let ret_from_fork() jmp to fred_exit_user when FRED is enabled Xin Li
2023-04-10 18:16   ` Dave Hansen
2023-04-10 18:31     ` Li, Xin3
2023-04-10 19:25       ` Li, Xin3
2023-04-04 10:27 ` [PATCH v7 24/33] x86/fred: disallow the swapgs instruction " Xin Li
2023-04-04 10:27 ` [PATCH v7 25/33] x86/fred: no ESPFIX needed " Xin Li
2023-04-04 10:27 ` [PATCH v7 26/33] x86/fred: allow single-step trap and NMI when starting a new thread Xin Li
2023-04-04 10:27 ` [PATCH v7 27/33] x86/fred: fixup fault on ERETU by jumping to fred_entrypoint_user Xin Li
2023-04-04 10:27 ` [PATCH v7 28/33] x86/ia32: do not modify the DPL bits for a null selector Xin Li
2023-04-04 10:27 ` [PATCH v7 29/33] x86/fred: allow FRED systems to use interrupt vectors 0x10-0x1f Xin Li
2023-04-04 10:27 ` [PATCH v7 30/33] x86/fred: allow dynamic stack frame size Xin Li
2023-04-04 10:27 ` [PATCH v7 31/33] x86/fred: BUG() when ERETU with %rsp not equal to that when the ring 3 event was just delivered Xin Li
2023-04-04 10:27 ` [PATCH v7 32/33] x86/fred: disable FRED by default in its early stage Xin Li
2023-04-04 10:27 ` Xin Li [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230404102716.1795-34-xin3.li@intel.com \
    --to=xin3.li@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=jiangshanlai@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=seanjc@google.com \
    --cc=shan.kang@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox