All of lore.kernel.org
 help / color / mirror / Atom feed
From: Sean Christopherson <seanjc@google.com>
To: Xin Li <xin3.li@intel.com>
Cc: linux-kernel@vger.kernel.org, x86@kernel.org,
	kvm@vger.kernel.org, tglx@linutronix.de, mingo@redhat.com,
	bp@alien8.de, dave.hansen@linux.intel.com, hpa@zytor.com,
	peterz@infradead.org, andrew.cooper3@citrix.com,
	pbonzini@redhat.com, ravi.v.shankar@intel.com,
	jiangshanlai@gmail.com, shan.kang@intel.com
Subject: Re: [PATCH v8 33/33] KVM: x86/vmx: refactor VMX_DO_EVENT_IRQOFF to generate FRED stack frames
Date: Mon, 10 Apr 2023 14:50:06 -0700	[thread overview]
Message-ID: <ZDSEjhGV9D90J6Bx@google.com> (raw)
In-Reply-To: <20230410081438.1750-34-xin3.li@intel.com>

"KVM: VMX:" for the shortlog please.

On Mon, Apr 10, 2023, Xin Li wrote:
> -.macro VMX_DO_EVENT_IRQOFF call_insn call_target
> +.macro VMX_DO_EVENT_IRQOFF call_insn call_target fred=0 nmi=0
>  	/*
>  	 * Unconditionally create a stack frame, getting the correct RSP on the
>  	 * stack (for x86-64) would take two instructions anyways, and RBP can
> @@ -41,16 +43,55 @@
>  	mov %_ASM_SP, %_ASM_BP
>  
>  #ifdef CONFIG_X86_64
> +#ifdef CONFIG_X86_FRED
> +	/*
> +	 * It's not necessary to change current stack level for handling IRQ/NMI
> +	 * because the state of the kernel stack is well defined in this place
> +	 * in the code, and it is known not to be deep in a bunch of nested I/O
> +	 * layer handlers that eat up the stack.
> +	 *
> +	 * Before starting to push a FRED stack frame, FRED reserves a redzone
> +	 * (for CALL emulation) and aligns RSP to a 64-byte boundary.
> +	 */
> +	sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
> +	and $FRED_STACK_FRAME_RSP_MASK, %rsp
> +
> +	/*
> +	 * A FRED stack frame has extra 16 bytes of information pushed at the
> +	 * regular stack top comparing to an IDT stack frame.
> +	 */
> +	push $0		/* Reserved by FRED, must be 0 */
> +	push $0		/* FRED event data, 0 for NMI and external interrupts */
> +#else
>  	/*
>  	 * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
>  	 * creating the synthetic interrupt stack frame for the IRQ/NMI.
>  	 */
>  	and  $-16, %rsp
> -	push $__KERNEL_DS
> +#endif
> +
> +	.if \fred
> +	.if \nmi
> +	mov $(2 << 32 | 2 << 48), %rax		/* NMI event type and vector */
> +	.else
> +	mov %rdi, %rax
> +	shl $32, %rax				/* External interrupt vector */
> +	.endif
> +	add $__KERNEL_DS, %rax
> +	bts $57, %rax				/* Set 64-bit mode */
> +	.else
> +	mov $__KERNEL_DS, %rax
> +	.endif
> +	push %rax

This is painfully difficult to read, and the trampolines only add to that pain.
Using macros instead of magic numbers would alleviate a small amount of pain, but
but the #ifdefs and .if \fred/\nmi are the real culprits.

>  static void handle_nm_fault_irqoff(struct kvm_vcpu *vcpu)
> @@ -6916,14 +6916,20 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
>  {
>  	u32 intr_info = vmx_get_intr_info(vcpu);
>  	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
> -	gate_desc *desc = (gate_desc *)host_idt_base + vector;
> +	unsigned long entry_or_vector;
> +
> +#ifdef CONFIG_X86_64
> +	entry_or_vector = vector;
> +#else
> +	entry_or_vector = gate_offset((gate_desc *)host_idt_base + vector);
> +#endif

And then this is equally gross.  Rather than funnel FRED+legacy into a single
function only to split them back out, just route FRED into its own asm subroutine.
The common bits are basically the creation/destruction of the stack frame and the
CALL itself, i.e. the truly interesting bits are what's different.

Pretty much all of the #ifdeffery goes away, the helpers just need #ifdefs to
play nice with CONFIG_X86_FRED=n.  E.g. something like the below as a starting
point (it most definitely doesn't compile, and most definitely isn't 100% correct).

---
 arch/x86/kvm/vmx/vmenter.S | 72 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c     | 19 ++++++++--
 2 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 631fd7da2bc3..a6929c78e038 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -2,12 +2,14 @@
 #include <linux/linkage.h>
 #include <asm/asm.h>
 #include <asm/bitsperlong.h>
+#include <asm/fred.h>
 #include <asm/kvm_vcpu_regs.h>
 #include <asm/nospec-branch.h>
 #include <asm/percpu.h>
 #include <asm/segment.h>
 #include "kvm-asm-offsets.h"
 #include "run_flags.h"
+#include "../../entry/calling.h"
 
 #define WORD_SIZE (BITS_PER_LONG / 8)
 
@@ -31,6 +33,62 @@
 #define VCPU_R15	__VCPU_REGS_R15 * WORD_SIZE
 #endif
 
+#ifdef CONFIG_X86_FRED
+.macro VMX_DO_FRED_EVENT_IRQOFF call_target cs_val
+	/*
+	 * Unconditionally create a stack frame, getting the correct RSP on the
+	 * stack (for x86-64) would take two instructions anyways, and RBP can
+	 * be used to restore RSP to make objtool happy (see below).
+	 */
+	push %_ASM_BP
+	mov %_ASM_SP, %_ASM_BP
+
+	/*
+	 * Don't check the FRED stack level, the call stack leading to this
+	 * helper is effectively constant and shallow (relatively speaking).
+	 *
+	 * Emulate the FRED-defined redzone and stack alignment (128 bytes and
+	 * 64 bytes respectively).
+	 */
+	sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
+	and $FRED_STACK_FRAME_RSP_MASK, %rsp
+
+	/*
+	* A FRED stack frame has extra 16 bytes of information pushed at the
+	* regular stack top compared to an IDT stack frame.
+	*/
+	push $0         /* Reserved by FRED, must be 0 */
+	push $0         /* FRED event data, 0 for NMI and external interrupts */
+	shl $32, %rax
+	orq $__KERNEL_DS | $FRED_64_BIT_MODE, %ax
+	push %rax	/* Vector (from the "caller") and DS */
+
+	push %rbp
+	pushf
+	push \cs_val
+
+	push $0 /* FRED error code, 0 for NMI and external interrupts */
+	PUSH_REGS
+
+	/* Load @pt_regs */
+	movq    %rsp, %_ASM_ARG1
+
+	call \call_target
+
+	POP_REGS
+
+	/*
+	 * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+	 * the correct value.  objtool doesn't know the callee will IRET and,
+	 * without the explicit restore, thinks the stack is getting walloped.
+	 * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+	 */
+	mov %_ASM_BP, %_ASM_SP
+	pop %_ASM_BP
+	RET
+.endm
+#endif
+
 .macro VMX_DO_EVENT_IRQOFF call_insn call_target
 	/*
 	 * Unconditionally create a stack frame, getting the correct RSP on the
@@ -299,6 +357,14 @@ SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
 
 SYM_FUNC_END(__vmx_vcpu_run)
 
+#ifdef CONFIG_X86_FRED
+SYM_FUNC_START(vmx_do_fred_nmi_irqoff)
+	push $FRED_NMI_ERROR_CODE
+	mov $NMI_VECTOR | $FRED_NMI_SOMETHING, %eax
+	VMX_DO_FRED_EVENT_IRQOFF call fred_entrypoint_kernel $FRED_NMI_CS_VAL
+SYM_FUNC_END(vmx_do_nmi_irqoff)
+#endif
+
 SYM_FUNC_START(vmx_do_nmi_irqoff)
 	VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
 SYM_FUNC_END(vmx_do_nmi_irqoff)
@@ -357,6 +423,12 @@ SYM_FUNC_START(vmread_error_trampoline)
 SYM_FUNC_END(vmread_error_trampoline)
 #endif
 
+#ifdef CONFIG_X86_FRED
+SYM_FUNC_START(vmx_do_fred_interrupt_irqoff)
+	mov %_ASM_ARG1, %rax
+	VMX_DO_FRED_EVENT_IRQOFF call external_interrupt
+#endif
+
 SYM_FUNC_START(vmx_do_interrupt_irqoff)
 	VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
 SYM_FUNC_END(vmx_do_interrupt_irqoff)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 11080a649f60..42f50b0cc125 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6891,6 +6891,14 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
 	memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
 }
 
+#ifdef CONFIG_X86_FRED
+void vmx_do_fred_interrupt_irqoff(unsigned int vector);
+void vmx_do_fred_nmi_irqoff(unsigned int vector);
+#else
+#define vmx_do_fred_interrupt_irqoff(x) BUG();
+#define vmx_do_fred_nmi_irqoff(x) BUG();
+#endif
+
 void vmx_do_interrupt_irqoff(unsigned long entry);
 void vmx_do_nmi_irqoff(void);
 
@@ -6933,14 +6941,16 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
 {
 	u32 intr_info = vmx_get_intr_info(vcpu);
 	unsigned int vector = intr_info & INTR_INFO_VECTOR_MASK;
-	gate_desc *desc = (gate_desc *)host_idt_base + vector;
 
 	if (KVM_BUG(!is_external_intr(intr_info), vcpu->kvm,
 	    "unexpected VM-Exit interrupt info: 0x%x", intr_info))
 		return;
 
 	kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
-	vmx_do_interrupt_irqoff(gate_offset(desc));
+	if (cpu_feature_enabled(X86_FEATURE_FRED))
+		vmx_do_fred_interrupt_irqoff(vector);
+	else
+		vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
 	kvm_after_interrupt(vcpu);
 
 	vcpu->arch.at_instruction_boundary = true;
@@ -7226,7 +7236,10 @@ static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
 	if ((u16)vmx->exit_reason.basic == EXIT_REASON_EXCEPTION_NMI &&
 	    is_nmi(vmx_get_intr_info(vcpu))) {
 		kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
-		vmx_do_nmi_irqoff();
+		if (cpu_feature_enabled(X86_FEATURE_FRED))
+			vmx_do_fred_nmi_irqoff();
+		else
+			vmx_do_nmi_irqoff();
 		kvm_after_interrupt(vcpu);
 	}
 

base-commit: 33d1a64081c98e390e064db18738428d6fb96f95
-- 


  reply	other threads:[~2023-04-10 21:50 UTC|newest]

Thread overview: 96+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-10  8:14 [PATCH v8 00/33] x86: enable FRED for x86-64 Xin Li
2023-04-10  8:14 ` [PATCH v8 01/33] x86/traps: let common_interrupt() handle IRQ_MOVE_CLEANUP_VECTOR Xin Li
2023-05-07 11:59   ` Borislav Petkov
2023-06-03 19:19     ` Li, Xin3
2023-06-03 20:51   ` Thomas Gleixner
2023-06-05 17:07     ` Thomas Gleixner
2023-06-05 17:09       ` H. Peter Anvin
2023-06-06 20:09         ` Thomas Gleixner
2023-06-06 23:16           ` Li, Xin3
2023-06-19  8:00           ` Li, Xin3
2023-06-19 14:22             ` Thomas Gleixner
2023-06-19 18:47               ` Li, Xin3
2023-06-19 19:16                 ` H. Peter Anvin
2023-06-20  0:04                   ` Li, Xin3
2023-04-10  8:14 ` [PATCH v8 02/33] x86/fred: make unions for the cs and ss fields in struct pt_regs Xin Li
2023-06-03  9:48   ` Borislav Petkov
2023-06-05 12:07   ` Thomas Gleixner
2023-06-05 17:12     ` H. Peter Anvin
2023-06-05 17:29       ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 03/33] x86/traps: add a system interrupt table for system interrupt dispatch Xin Li
2023-06-05  8:34   ` Thomas Gleixner
2023-06-06  8:05     ` Li, Xin3
2023-06-05  8:38   ` Thomas Gleixner
2023-06-05  8:39   ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 04/33] x86/traps: add install_system_interrupt_handler() Xin Li
2023-06-05  8:57   ` Thomas Gleixner
2023-06-06  5:46     ` Li, Xin3
2023-04-10  8:14 ` [PATCH v8 05/33] x86/traps: add external_interrupt() to dispatch external interrupts Xin Li
2023-06-05 11:56   ` Thomas Gleixner
2023-06-05 17:52     ` Thomas Gleixner
2023-06-19 19:16     ` Li, Xin3
2023-06-19 21:13       ` Thomas Gleixner
2023-06-20  0:16         ` Li, Xin3
2023-04-10  8:14 ` [PATCH v8 06/33] x86/cpufeature: add the cpu feature bit for FRED Xin Li
2023-04-10  8:14 ` [PATCH v8 07/33] x86/opcode: add ERETU, ERETS instructions to x86-opcode-map Xin Li
2023-04-10  8:14 ` [PATCH v8 08/33] x86/objtool: teach objtool about ERETU and ERETS Xin Li
2023-04-10  8:14 ` [PATCH v8 09/33] x86/cpu: add X86_CR4_FRED macro Xin Li
2023-06-05 12:01   ` Thomas Gleixner
2023-06-05 17:06     ` H. Peter Anvin
2023-06-05 17:19     ` H. Peter Anvin
2023-04-10  8:14 ` [PATCH v8 10/33] x86/fred: add Kconfig option for FRED (CONFIG_X86_FRED) Xin Li
2023-04-10  8:14 ` [PATCH v8 11/33] x86/fred: if CONFIG_X86_FRED is disabled, disable FRED support Xin Li
2023-04-10  8:14 ` [PATCH v8 12/33] x86/cpu: add MSR numbers for FRED configuration Xin Li
2023-04-10  8:14 ` [PATCH v8 13/33] x86/fred: header file for event types Xin Li
2023-04-10  8:14 ` [PATCH v8 14/33] x86/fred: header file with FRED definitions Xin Li
2023-04-10  8:14 ` [PATCH v8 15/33] x86/fred: reserve space for the FRED stack frame Xin Li
2023-04-10  8:14 ` [PATCH v8 16/33] x86/fred: add a page fault entry stub for FRED Xin Li
2023-04-10  8:14 ` [PATCH v8 17/33] x86/fred: add a debug " Xin Li
2023-04-10  8:14 ` [PATCH v8 18/33] x86/fred: add a NMI " Xin Li
2023-04-10  8:14 ` [PATCH v8 19/33] x86/fred: add a machine check " Xin Li
2023-04-10  8:14 ` [PATCH v8 20/33] x86/fred: FRED entry/exit and dispatch code Xin Li
2023-06-05 13:21   ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 21/33] x86/fred: FRED initialization code Xin Li
2023-06-05 12:15   ` Thomas Gleixner
2023-06-05 13:41   ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 22/33] x86/fred: update MSR_IA32_FRED_RSP0 during task switch Xin Li
2023-04-10  8:14 ` [PATCH v8 23/33] x86/fred: let ret_from_fork() jmp to fred_exit_user when FRED is enabled Xin Li
2023-04-10  8:14 ` [PATCH v8 24/33] x86/fred: disallow the swapgs instruction " Xin Li
2023-06-05 13:47   ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 25/33] x86/fred: no ESPFIX needed " Xin Li
2023-04-10  8:14 ` [PATCH v8 26/33] x86/fred: allow single-step trap and NMI when starting a new thread Xin Li
2023-06-05 13:50   ` Thomas Gleixner
2023-06-05 13:52     ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 27/33] x86/fred: fixup fault on ERETU by jumping to fred_entrypoint_user Xin Li
2023-04-10  8:14 ` [PATCH v8 28/33] x86/ia32: do not modify the DPL bits for a null selector Xin Li
2023-04-10  8:14 ` [PATCH v8 29/33] x86/fred: allow FRED systems to use interrupt vectors 0x10-0x1f Xin Li
2023-06-05 14:06   ` Thomas Gleixner
2023-06-05 16:55     ` H. Peter Anvin
2023-04-10  8:14 ` [PATCH v8 30/33] x86/fred: allow dynamic stack frame size Xin Li
2023-06-05 14:11   ` Thomas Gleixner
2023-06-06  6:18     ` Li, Xin3
2023-06-06 13:27       ` Thomas Gleixner
2023-06-06 23:08         ` H. Peter Anvin
2023-04-10  8:14 ` [PATCH v8 31/33] x86/fred: BUG() when ERETU with %rsp not equal to that when the ring 3 event was just delivered Xin Li
2023-06-05 14:15   ` Thomas Gleixner
2023-06-05 16:42     ` H. Peter Anvin
2023-06-05 17:16       ` Thomas Gleixner
2023-04-10  8:14 ` [PATCH v8 32/33] x86/fred: disable FRED by default in its early stage Xin Li
2023-04-10  8:14 ` [PATCH v8 33/33] KVM: x86/vmx: refactor VMX_DO_EVENT_IRQOFF to generate FRED stack frames Xin Li
2023-04-10 21:50   ` Sean Christopherson [this message]
2023-04-11  5:06     ` Li, Xin3
2023-04-11 18:34       ` Sean Christopherson
2023-04-11 22:50         ` Li, Xin3
2023-04-12 18:26     ` Li, Xin3
2023-04-12 19:37       ` Sean Christopherson
2023-04-10 18:37 ` [PATCH v8 00/33] x86: enable FRED for x86-64 Dave Hansen
2023-04-10 19:14   ` Li, Xin3
2023-04-10 19:32     ` Borislav Petkov
2023-04-10 19:38       ` Dave Hansen
2023-04-10 20:52         ` Li, Xin3
2023-04-11  4:14       ` Li, Xin3
2023-04-10 18:49 ` Dave Hansen
2023-04-10 19:16   ` Li, Xin3
2023-06-05 17:11     ` Thomas Gleixner
2023-06-05 17:22 ` H. Peter Anvin
2023-06-05 17:32   ` Thomas Gleixner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZDSEjhGV9D90J6Bx@google.com \
    --to=seanjc@google.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=jiangshanlai@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=shan.kang@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    --cc=xin3.li@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.