public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Xin Li <xin3.li@intel.com>
To: linux-kernel@vger.kernel.org, x86@kernel.org, kvm@vger.kernel.org
Cc: tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, hpa@zytor.com, peterz@infradead.org,
	andrew.cooper3@citrix.com, seanjc@google.com,
	pbonzini@redhat.com, ravi.v.shankar@intel.com,
	jiangshanlai@gmail.com, shan.kang@intel.com
Subject: [PATCH v7 20/33] x86/fred: FRED entry/exit and dispatch code
Date: Tue,  4 Apr 2023 03:27:03 -0700	[thread overview]
Message-ID: <20230404102716.1795-21-xin3.li@intel.com> (raw)
In-Reply-To: <20230404102716.1795-1-xin3.li@intel.com>

From: "H. Peter Anvin (Intel)" <hpa@zytor.com>

The code to actually handle kernel and event entry/exit using
FRED. It is split up into two files thus:

- entry_64_fred.S contains the actual entrypoints and exit code, and
  saves and restores registers.
- entry_fred.c contains the two-level event dispatch code for FRED.
  The first-level dispatch is on the event type, and the second-level
  is on the event vector.

Originally-by: Megha Dey <megha.dey@intel.com>
Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Co-developed-by: Xin Li <xin3.li@intel.com>
Tested-by: Shan Kang <shan.kang@intel.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
---

Changes since v1:
* Initialize a FRED exception handler to fred_bad_event() instead of NULL
  if no FRED handler defined for an exception vector (Peter Zijlstra).
* Push calling irqentry_{enter,exit}() and instrumentation_{begin,end}()
  down into individual FRED exception handlers, instead of in the dispatch
  framework (Peter Zijlstra).
---
 arch/x86/entry/Makefile         |   5 +-
 arch/x86/entry/entry_64_fred.S  |  57 +++++++++
 arch/x86/entry/entry_fred.c     | 220 ++++++++++++++++++++++++++++++++
 arch/x86/include/asm/idtentry.h |   8 ++
 4 files changed, 289 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/entry/entry_64_fred.S
 create mode 100644 arch/x86/entry/entry_fred.c

diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index ca2fe186994b..c93e7f5c2a06 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -18,6 +18,9 @@ obj-y				+= vdso/
 obj-y				+= vsyscall/
 
 obj-$(CONFIG_PREEMPTION)	+= thunk_$(BITS).o
+CFLAGS_entry_fred.o		+= -fno-stack-protector
+CFLAGS_REMOVE_entry_fred.o	+= -pg $(CC_FLAGS_FTRACE)
+obj-$(CONFIG_X86_FRED)		+= entry_64_fred.o entry_fred.o
+
 obj-$(CONFIG_IA32_EMULATION)	+= entry_64_compat.o syscall_32.o
 obj-$(CONFIG_X86_X32_ABI)	+= syscall_x32.o
-
diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
new file mode 100644
index 000000000000..d975cacd060f
--- /dev/null
+++ b/arch/x86/entry/entry_64_fred.S
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ *  arch/x86/entry/entry_64_fred.S
+ *
+ * The actual FRED entry points.
+ */
+#include <linux/linkage.h>
+#include <asm/errno.h>
+#include <asm/asm-offsets.h>
+#include <asm/fred.h>
+
+#include "calling.h"
+
+	.code64
+	.section ".noinstr.text", "ax"
+
+.macro FRED_ENTER
+	UNWIND_HINT_EMPTY
+	PUSH_AND_CLEAR_REGS
+	movq	%rsp, %rdi	/* %rdi -> pt_regs */
+.endm
+
+.macro FRED_EXIT
+	UNWIND_HINT_REGS
+	POP_REGS
+	addq $8,%rsp		/* Drop error code */
+.endm
+
+/*
+ * The new RIP value that FRED event delivery establishes is
+ * IA32_FRED_CONFIG & ~FFFH for events that occur in ring 3.
+ * Thus the FRED ring 3 entry point must be 4K page aligned.
+ */
+	.align 4096
+
+SYM_CODE_START_NOALIGN(fred_entrypoint_user)
+	FRED_ENTER
+	call	fred_entry_from_user
+SYM_INNER_LABEL(fred_exit_user, SYM_L_GLOBAL)
+	FRED_EXIT
+	ERETU
+SYM_CODE_END(fred_entrypoint_user)
+
+.fill fred_entrypoint_kernel - ., 1, 0xcc
+
+/*
+ * The new RIP value that FRED event delivery establishes is
+ * (IA32_FRED_CONFIG & ~FFFH) + 256 for events that occur in
+ * ring 0, i.e., fred_entrypoint_user + 256.
+ */
+	.org fred_entrypoint_user+256
+SYM_CODE_START_NOALIGN(fred_entrypoint_kernel)
+	FRED_ENTER
+	call	fred_entry_from_kernel
+	FRED_EXIT
+	ERETS
+SYM_CODE_END(fred_entrypoint_kernel)
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
new file mode 100644
index 000000000000..0bff1db913b8
--- /dev/null
+++ b/arch/x86/entry/entry_fred.c
@@ -0,0 +1,220 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arch/x86/entry/entry_fred.c
+ *
+ * This contains the dispatch functions called from the entry point
+ * assembly.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kdebug.h>		/* oops_begin/end, ...		*/
+#include <linux/nospec.h>
+#include <asm/event-type.h>
+#include <asm/fred.h>
+#include <asm/idtentry.h>
+#include <asm/syscall.h>
+#include <asm/trapnr.h>
+#include <asm/traps.h>
+#include <asm/kdebug.h>
+
+/*
+ * Badness...
+ */
+static DEFINE_FRED_HANDLER(fred_bad_event)
+{
+	irqentry_state_t irq_state = irqentry_nmi_enter(regs);
+
+	instrumentation_begin();
+
+	/* Panic on events from a high stack level */
+	if (regs->current_stack_level > 0) {
+		pr_emerg("PANIC: invalid or fatal FRED event; event type %u "
+			 "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
+			 regs->type, regs->vector, regs->orig_ax,
+			 fred_event_data(regs), regs->cs, regs->ip);
+		die("invalid or fatal FRED event", regs, regs->orig_ax);
+		panic("invalid or fatal FRED event");
+	} else {
+		unsigned long flags = oops_begin();
+		int sig = SIGKILL;
+
+		pr_alert("BUG: invalid or fatal FRED event; event type %u "
+			 "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
+			 regs->type, regs->vector, regs->orig_ax,
+			 fred_event_data(regs), regs->cs, regs->ip);
+
+		if (__die("Invalid or fatal FRED event", regs, regs->orig_ax))
+			sig = 0;
+
+		oops_end(flags, regs, sig);
+	}
+
+	instrumentation_end();
+	irqentry_nmi_exit(regs, irq_state);
+}
+
+noinstr void fred_exc_double_fault(struct pt_regs *regs)
+{
+	exc_double_fault(regs, regs->orig_ax);
+}
+
+/*
+ * Exception entry
+ */
+static DEFINE_FRED_HANDLER(fred_exception)
+{
+	/*
+	 * Exceptions that cannot happen on FRED h/w are set to fred_bad_event().
+	 */
+	static const fred_handler exception_handlers[NUM_EXCEPTION_VECTORS] = {
+		[X86_TRAP_DE] = exc_divide_error,
+		[X86_TRAP_DB] = fred_exc_debug,
+		[X86_TRAP_NMI] = fred_bad_event, /* A separate event type, not handled here */
+		[X86_TRAP_BP] = exc_int3,
+		[X86_TRAP_OF] = exc_overflow,
+		[X86_TRAP_BR] = exc_bounds,
+		[X86_TRAP_UD] = exc_invalid_op,
+		[X86_TRAP_NM] = exc_device_not_available,
+		[X86_TRAP_DF] = fred_exc_double_fault,
+		[X86_TRAP_OLD_MF] = fred_bad_event, /* 387 only! */
+		[X86_TRAP_TS] = fred_exc_invalid_tss,
+		[X86_TRAP_NP] = fred_exc_segment_not_present,
+		[X86_TRAP_SS] = fred_exc_stack_segment,
+		[X86_TRAP_GP] = fred_exc_general_protection,
+		[X86_TRAP_PF] = fred_exc_page_fault,
+		[X86_TRAP_SPURIOUS] = fred_bad_event, /* Interrupts are their own event type */
+		[X86_TRAP_MF] = exc_coprocessor_error,
+		[X86_TRAP_AC] = fred_exc_alignment_check,
+		[X86_TRAP_MC] = fred_exc_machine_check,
+		[X86_TRAP_XF] = exc_simd_coprocessor_error,
+		[X86_TRAP_VE...NUM_EXCEPTION_VECTORS-1] = fred_bad_event
+	};
+	u8 vector = array_index_nospec((u8)regs->vector, NUM_EXCEPTION_VECTORS);
+
+	exception_handlers[vector](regs);
+}
+
+static __always_inline void fred_emulate_trap(struct pt_regs *regs)
+{
+	regs->type = EVENT_TYPE_SWFAULT;
+	regs->orig_ax = 0;
+	fred_exception(regs);
+}
+
+static __always_inline void fred_emulate_fault(struct pt_regs *regs)
+{
+	regs->ip -= regs->instr_len;
+	fred_emulate_trap(regs);
+}
+
+/*
+ * Emulate SYSENTER if applicable. This is not the preferred system
+ * call in 32-bit mode under FRED, rather int $0x80 is preferred and
+ * exported in the vdso. SYSCALL proper has a hard-coded early out in
+ * fred_entry_from_user().
+ */
+static DEFINE_FRED_HANDLER(fred_syscall_slow)
+{
+	if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
+	    likely(regs->vector == FRED_SYSENTER)) {
+		/* Convert frame to a syscall frame */
+		regs->orig_ax = regs->ax;
+		regs->ax = -ENOSYS;
+		do_fast_syscall_32(regs);
+	} else {
+		regs->vector = X86_TRAP_UD;
+		fred_emulate_fault(regs);
+	}
+}
+
+/*
+ * Some software exceptions can also be triggered as int instructions,
+ * for historical reasons. Implement those here. The performance-critical
+ * int $0x80 (32-bit system call) has a hard-coded early out.
+ */
+static DEFINE_FRED_HANDLER(fred_sw_interrupt_user)
+{
+	if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
+	    likely(regs->vector == IA32_SYSCALL_VECTOR)) {
+		/* Convert frame to a syscall frame */
+		regs->orig_ax = regs->ax;
+		regs->ax = -ENOSYS;
+		return do_int80_syscall_32(regs);
+	}
+
+	switch (regs->vector) {
+	case X86_TRAP_BP:
+	case X86_TRAP_OF:
+		fred_emulate_trap(regs);
+		break;
+	default:
+		regs->vector = X86_TRAP_GP;
+		fred_emulate_fault(regs);
+		break;
+	}
+}
+
+static DEFINE_FRED_HANDLER(fred_hw_interrupt)
+{
+	irqentry_state_t state = irqentry_enter(regs);
+
+	instrumentation_begin();
+	external_interrupt(regs);
+	instrumentation_end();
+	irqentry_exit(regs, state);
+}
+
+__visible noinstr void fred_entry_from_user(struct pt_regs *regs)
+{
+	static const fred_handler user_handlers[FRED_EVENT_TYPE_COUNT] =
+	{
+		[EVENT_TYPE_HWINT]	= fred_hw_interrupt,
+		[EVENT_TYPE_RESERVED]	= fred_bad_event,
+		[EVENT_TYPE_NMI]	= fred_exc_nmi,
+		[EVENT_TYPE_SWINT]	= fred_sw_interrupt_user,
+		[EVENT_TYPE_HWFAULT]	= fred_exception,
+		[EVENT_TYPE_SWFAULT]	= fred_exception,
+		[EVENT_TYPE_PRIVSW]	= fred_exception,
+		[EVENT_TYPE_OTHER]	= fred_syscall_slow
+	};
+
+	/*
+	 * FRED employs a two-level event dispatch mechanism, with
+	 * the first-level on the type of an event and the second-level
+	 * on its vector. Thus a dispatch typically induces 2 calls.
+	 * We optimize it by using early outs for the most frequent
+	 * events, and syscalls are the first. We may also need early
+	 * outs for page faults.
+	 */
+	if (likely(regs->type == EVENT_TYPE_OTHER &&
+		   regs->vector == FRED_SYSCALL)) {
+		/* Convert frame to a syscall frame */
+		regs->orig_ax = regs->ax;
+		regs->ax = -ENOSYS;
+		do_syscall_64(regs, regs->orig_ax);
+	} else {
+		/* Not a system call */
+		u8 type = array_index_nospec((u8)regs->type, FRED_EVENT_TYPE_COUNT);
+
+		user_handlers[type](regs);
+	}
+}
+
+__visible noinstr void fred_entry_from_kernel(struct pt_regs *regs)
+{
+	static const fred_handler kernel_handlers[FRED_EVENT_TYPE_COUNT] =
+	{
+		[EVENT_TYPE_HWINT]	= fred_hw_interrupt,
+		[EVENT_TYPE_RESERVED]	= fred_bad_event,
+		[EVENT_TYPE_NMI]	= fred_exc_nmi,
+		[EVENT_TYPE_SWINT]	= fred_bad_event,
+		[EVENT_TYPE_HWFAULT]	= fred_exception,
+		[EVENT_TYPE_SWFAULT]	= fred_exception,
+		[EVENT_TYPE_PRIVSW]	= fred_exception,
+		[EVENT_TYPE_OTHER]	= fred_bad_event
+	};
+	u8 type = array_index_nospec((u8)regs->type, FRED_EVENT_TYPE_COUNT);
+
+	/* The pt_regs frame on entry here is an exception frame */
+	kernel_handlers[type](regs);
+}
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 2876ddae02bc..bd43866f9c3e 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -82,6 +82,7 @@ static __always_inline void __##func(struct pt_regs *regs)
 #define DECLARE_IDTENTRY_ERRORCODE(vector, func)			\
 	asmlinkage void asm_##func(void);				\
 	asmlinkage void xen_asm_##func(void);				\
+	__visible void fred_##func(struct pt_regs *regs);		\
 	__visible void func(struct pt_regs *regs, unsigned long error_code)
 
 /**
@@ -106,6 +107,11 @@ __visible noinstr void func(struct pt_regs *regs,			\
 	irqentry_exit(regs, state);					\
 }									\
 									\
+__visible noinstr void fred_##func(struct pt_regs *regs)		\
+{									\
+	func (regs, regs->orig_ax);					\
+}									\
+									\
 static __always_inline void __##func(struct pt_regs *regs,		\
 				     unsigned long error_code)
 
@@ -622,6 +628,8 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_MC,	exc_machine_check);
 #ifdef CONFIG_XEN_PV
 DECLARE_IDTENTRY_RAW(X86_TRAP_MC,	xenpv_exc_machine_check);
 #endif
+#else
+#define fred_exc_machine_check		fred_bad_event
 #endif
 
 /* NMI */
-- 
2.34.1


  parent reply	other threads:[~2023-04-04 10:55 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-04 10:26 [PATCH v7 00/33] x86: enable FRED for x86-64 Xin Li
2023-04-04 10:26 ` [PATCH v7 01/33] x86/traps: let common_interrupt() handle IRQ_MOVE_CLEANUP_VECTOR Xin Li
2023-04-04 10:26 ` [PATCH v7 02/33] x86/fred: make unions for the cs and ss fields in struct pt_regs Xin Li
2023-04-04 10:26 ` [PATCH v7 03/33] x86/traps: add a system interrupt table for system interrupt dispatch Xin Li
2023-04-04 10:26 ` [PATCH v7 04/33] x86/traps: add install_system_interrupt_handler() Xin Li
2023-04-04 10:26 ` [PATCH v7 05/33] x86/traps: add external_interrupt() to dispatch external interrupts Xin Li
2023-04-04 10:26 ` [PATCH v7 06/33] x86/cpufeature: add the cpu feature bit for FRED Xin Li
2023-04-04 10:26 ` [PATCH v7 07/33] x86/opcode: add ERETU, ERETS instructions to x86-opcode-map Xin Li
2023-04-04 10:26 ` [PATCH v7 08/33] x86/objtool: teach objtool about ERETU and ERETS Xin Li
2023-04-04 10:26 ` [PATCH v7 09/33] x86/cpu: add X86_CR4_FRED macro Xin Li
2023-04-04 10:26 ` [PATCH v7 10/33] x86/fred: add Kconfig option for FRED (CONFIG_X86_FRED) Xin Li
2023-04-04 10:26 ` [PATCH v7 11/33] x86/fred: if CONFIG_X86_FRED is disabled, disable FRED support Xin Li
2023-04-04 10:26 ` [PATCH v7 12/33] x86/cpu: add MSR numbers for FRED configuration Xin Li
2023-04-04 10:26 ` [PATCH v7 13/33] x86/fred: header file for event types Xin Li
2023-04-04 10:26 ` [PATCH v7 14/33] x86/fred: header file with FRED definitions Xin Li
2023-04-04 10:26 ` [PATCH v7 15/33] x86/fred: reserve space for the FRED stack frame Xin Li
2023-04-04 10:26 ` [PATCH v7 16/33] x86/fred: add a page fault entry stub for FRED Xin Li
2023-04-04 10:27 ` [PATCH v7 17/33] x86/fred: add a debug " Xin Li
2023-04-04 10:27 ` [PATCH v7 18/33] x86/fred: add a NMI " Xin Li
2023-04-04 10:27 ` [PATCH v7 19/33] x86/fred: add a machine check " Xin Li
2023-04-04 10:27 ` Xin Li [this message]
2023-04-04 10:27 ` [PATCH v7 21/33] x86/fred: FRED initialization code Xin Li
2023-04-04 10:27 ` [PATCH v7 22/33] x86/fred: update MSR_IA32_FRED_RSP0 during task switch Xin Li
2023-04-04 10:27 ` [PATCH v7 23/33] x86/fred: let ret_from_fork() jmp to fred_exit_user when FRED is enabled Xin Li
2023-04-10 18:16   ` Dave Hansen
2023-04-10 18:31     ` Li, Xin3
2023-04-10 19:25       ` Li, Xin3
2023-04-04 10:27 ` [PATCH v7 24/33] x86/fred: disallow the swapgs instruction " Xin Li
2023-04-04 10:27 ` [PATCH v7 25/33] x86/fred: no ESPFIX needed " Xin Li
2023-04-04 10:27 ` [PATCH v7 26/33] x86/fred: allow single-step trap and NMI when starting a new thread Xin Li
2023-04-04 10:27 ` [PATCH v7 27/33] x86/fred: fixup fault on ERETU by jumping to fred_entrypoint_user Xin Li
2023-04-04 10:27 ` [PATCH v7 28/33] x86/ia32: do not modify the DPL bits for a null selector Xin Li
2023-04-04 10:27 ` [PATCH v7 29/33] x86/fred: allow FRED systems to use interrupt vectors 0x10-0x1f Xin Li
2023-04-04 10:27 ` [PATCH v7 30/33] x86/fred: allow dynamic stack frame size Xin Li
2023-04-04 10:27 ` [PATCH v7 31/33] x86/fred: BUG() when ERETU with %rsp not equal to that when the ring 3 event was just delivered Xin Li
2023-04-04 10:27 ` [PATCH v7 32/33] x86/fred: disable FRED by default in its early stage Xin Li
2023-04-04 10:27 ` [PATCH v7 33/33] KVM: x86/vmx: refactor VMX_DO_EVENT_IRQOFF to generate FRED stack frames Xin Li

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230404102716.1795-21-xin3.li@intel.com \
    --to=xin3.li@intel.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=hpa@zytor.com \
    --cc=jiangshanlai@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ravi.v.shankar@intel.com \
    --cc=seanjc@google.com \
    --cc=shan.kang@intel.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox