From: Peter Zijlstra <peterz@infradead.org>
To: Sean Christopherson <seanjc@google.com>
Cc: Thomas Gleixner <tglx@kernel.org>,
Jim Mattson <jmattson@google.com>,
Binbin Wu <binbin.wu@linux.intel.com>,
Vishal L Verma <vishal.l.verma@intel.com>,
"kvm@vger.kernel.org" <kvm@vger.kernel.org>,
Rick P Edgecombe <rick.p.edgecombe@intel.com>,
Binbin Wu <binbin.wu@intel.com>,
"x86@kernel.org" <x86@kernel.org>,
Paolo Bonzini <bonzini@redhat.com>
Subject: Re: CPU Lockups in KVM with deferred hrtimer rearming
Date: Tue, 21 Apr 2026 22:06:20 +0200 [thread overview]
Message-ID: <20260421200620.GK3126523@noisy.programming.kicks-ass.net> (raw)
In-Reply-To: <aefIJR_FcEeP-fcS@google.com>
On Tue, Apr 21, 2026 at 11:55:33AM -0700, Sean Christopherson wrote:
> Pulling in an earlier idea:
>
> : Now for VMX, that hrtimer_rearm_deferred() call should really go into
> : handle_external_interrupt_irqoff(), which in turn requires to export
> : __hrtimer_rearm_deferred().
>
> Actually, even better would be to bury the FRED vs. not-FRED details in entry
> code. E.g. on the KVM invocation side, we could get to something like the below,
> and I'm pretty sure _reduce_ the number of for-KVM exports in the process.
Something like so then?
diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile
index 72cae8e0ce85..83b4762d6ecb 100644
--- a/arch/x86/entry/Makefile
+++ b/arch/x86/entry/Makefile
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_syscall_64.o = $(CC_FLAGS_FTRACE)
CFLAGS_syscall_32.o += -fno-stack-protector
CFLAGS_syscall_64.o += -fno-stack-protector
-obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o
+obj-y := entry.o entry_$(BITS).o syscall_$(BITS).o common.o
obj-y += vdso/
obj-y += vsyscall/
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
new file mode 100644
index 000000000000..4b0171abb083
--- /dev/null
+++ b/arch/x86/entry/common.c
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/kvm_types.h>
+#include <linux/hrtimer_rearm.h>
+#include <asm/entry-common.h>
+#include <asm/fred.h>
+#include <asm/desc.h>
+
+noinstr void x86_entry_from_kvm(unsigned int event_type, unsigned int vector)
+{
+#ifdef CONFIG_X86_64
+ fred_entry_from_kvm(event_type, vector);
+#else
+ idt_entry_from_kvm(vector);
+#endif
+ if (event_type == EVENT_TYPE_EXTINT) {
+ instrumentation_begin();
+ hrtimer_rearm_deferred();
+ instrumentation_end();
+ }
+}
+EXPORT_SYMBOL_FOR_KVM(x86_entry_from_kvm);
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 92c0b4a94e0a..96c3e9322297 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1224,3 +1224,36 @@ SYM_CODE_START(rewind_stack_and_make_dead)
1: jmp 1b
SYM_CODE_END(rewind_stack_and_make_dead)
.popsection
+
+.pushsection .noinstr.text, "ax"
+.macro IDT_DO_EVENT_IRQOFF call_insn call_target
+ /*
+ * Unconditionally create a stack frame, getting the correct RSP on the
+ * stack (for x86-64) would take two instructions anyways, and RBP can
+ * be used to restore RSP to make objtool happy (see below).
+ */
+ push %ebp
+ mov %esp, %ebp
+
+ pushf
+ push $__KERNEL_CS
+ \call_insn \call_target
+
+ /*
+ * "Restore" RSP from RBP, even though IRET has already unwound RSP to
+ * the correct value. objtool doesn't know the callee will IRET and,
+ * without the explicit restore, thinks the stack is getting walloped.
+ * Using an unwind hint is problematic due to x86-64's dynamic alignment.
+ */
+ leave
+ RET
+.endm
+
+SYM_FUNC_START(idt_do_interrupt_irqoff)
+ IDT_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
+SYM_FUNC_END(idt_do_interrupt_irqoff)
+
+SYM_FUNC_START(idt_do_nmi_irqoff)
+ IDT_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
+SYM_FUNC_END(idt_do_nmi_irqoff)
+.popsection
diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
index 894f7f16eb80..0d2768ab836c 100644
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -147,5 +147,4 @@ SYM_FUNC_START(asm_fred_entry_from_kvm)
RET
SYM_FUNC_END(asm_fred_entry_from_kvm)
-EXPORT_SYMBOL_FOR_KVM(asm_fred_entry_from_kvm);
#endif
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index ec95fe44fa3a..cb24990f38fd 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -437,6 +437,7 @@ extern void idt_setup_early_traps(void);
extern void idt_setup_traps(void);
extern void idt_setup_apic_and_irq_gates(void);
extern bool idt_is_f00f_address(unsigned long address);
+extern void idt_entry_from_kvm(unsigned int vector);
#ifdef CONFIG_X86_64
extern void idt_setup_early_pf(void);
diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h
index 7535131c711b..eca24b5e07f4 100644
--- a/arch/x86/include/asm/entry-common.h
+++ b/arch/x86/include/asm/entry-common.h
@@ -97,4 +97,6 @@ static __always_inline void arch_exit_to_user_mode(void)
}
#define arch_exit_to_user_mode arch_exit_to_user_mode
+extern void x86_entry_from_kvm(unsigned int entry_type, unsigned int vector);
+
#endif
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index 260456588756..d95d8d196cd4 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -266,6 +266,14 @@ void __init idt_setup_early_pf(void)
idt_setup_from_table(idt_table, early_pf_idts,
ARRAY_SIZE(early_pf_idts), true);
}
+#else
+void idt_entry_from_kvm(unsigned int vector)
+{
+ if (vector == NMI_VECTOR)
+ idt_do_nmi_irqoff();
+ else
+ idt_do_interrupt_irqoff(gate_offset(idt_table + vector));
+}
#endif
static void __init idt_map_in_cea(void)
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 8a481dae9cae..ff1f254a0ef4 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -31,38 +31,6 @@
#define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE
#endif
-.macro VMX_DO_EVENT_IRQOFF call_insn call_target
- /*
- * Unconditionally create a stack frame, getting the correct RSP on the
- * stack (for x86-64) would take two instructions anyways, and RBP can
- * be used to restore RSP to make objtool happy (see below).
- */
- push %_ASM_BP
- mov %_ASM_SP, %_ASM_BP
-
-#ifdef CONFIG_X86_64
- /*
- * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
- * creating the synthetic interrupt stack frame for the IRQ/NMI.
- */
- and $-16, %rsp
- push $__KERNEL_DS
- push %rbp
-#endif
- pushf
- push $__KERNEL_CS
- \call_insn \call_target
-
- /*
- * "Restore" RSP from RBP, even though IRET has already unwound RSP to
- * the correct value. objtool doesn't know the callee will IRET and,
- * without the explicit restore, thinks the stack is getting walloped.
- * Using an unwind hint is problematic due to x86-64's dynamic alignment.
- */
- leave
- RET
-.endm
-
.section .noinstr.text, "ax"
/**
@@ -320,10 +288,6 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
SYM_FUNC_END(__vmx_vcpu_run)
-SYM_FUNC_START(vmx_do_nmi_irqoff)
- VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
-SYM_FUNC_END(vmx_do_nmi_irqoff)
-
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
/**
@@ -375,13 +339,3 @@ SYM_FUNC_START(vmread_error_trampoline)
RET
SYM_FUNC_END(vmread_error_trampoline)
#endif
-
-.section .text, "ax"
-
-#ifndef CONFIG_X86_FRED
-
-SYM_FUNC_START(vmx_do_interrupt_irqoff)
- VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
-SYM_FUNC_END(vmx_do_interrupt_irqoff)
-
-#endif
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index a29896a9ef14..f6f5c124ed3b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7127,17 +7127,9 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu,
"unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
- /*
- * Invoke the kernel's IRQ handler for the vector. Use the FRED path
- * when it's available even if FRED isn't fully enabled, e.g. even if
- * FRED isn't supported in hardware, in order to avoid the indirect
- * CALL in the non-FRED path.
- */
+ /* For the IRQ to the core kernel for processing. */
kvm_before_interrupt(vcpu, KVM_HANDLING_IRQ);
- if (IS_ENABLED(CONFIG_X86_FRED))
- fred_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
- else
- vmx_do_interrupt_irqoff(gate_offset((gate_desc *)host_idt_base + vector));
+ x86_entry_from_kvm(EVENT_TYPE_EXTINT, vector);
kvm_after_interrupt(vcpu);
vcpu->arch.at_instruction_boundary = true;
@@ -7447,10 +7439,7 @@ noinstr void vmx_handle_nmi(struct kvm_vcpu *vcpu)
return;
kvm_before_interrupt(vcpu, KVM_HANDLING_NMI);
- if (cpu_feature_enabled(X86_FEATURE_FRED))
- fred_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
- else
- vmx_do_nmi_irqoff();
+ x86_entry_from_kvm(EVENT_TYPE_NMI, NMI_VECTOR);
kvm_after_interrupt(vcpu);
}
next prev parent reply other threads:[~2026-04-21 20:06 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-16 20:50 CPU Lockups in KVM with deferred hrtimer rearming Verma, Vishal L
2026-04-20 15:00 ` Thomas Gleixner
2026-04-20 15:22 ` Thomas Gleixner
2026-04-20 20:57 ` Verma, Vishal L
2026-04-20 22:19 ` Thomas Gleixner
2026-04-20 22:24 ` Verma, Vishal L
2026-04-21 6:29 ` Thomas Gleixner
2026-04-21 4:51 ` Binbin Wu
2026-04-21 7:39 ` Thomas Gleixner
2026-04-21 11:18 ` Peter Zijlstra
2026-04-21 11:32 ` Peter Zijlstra
2026-04-21 11:34 ` Peter Zijlstra
2026-04-21 11:49 ` Peter Zijlstra
2026-04-21 12:05 ` Peter Zijlstra
2026-04-21 13:19 ` Peter Zijlstra
2026-04-21 13:29 ` Peter Zijlstra
2026-04-21 16:36 ` Thomas Gleixner
2026-04-21 18:11 ` Verma, Vishal L
2026-04-21 17:11 ` Thomas Gleixner
2026-04-21 17:20 ` Jim Mattson
2026-04-21 18:29 ` Thomas Gleixner
2026-04-21 18:55 ` Sean Christopherson
2026-04-21 20:06 ` Peter Zijlstra [this message]
2026-04-21 20:46 ` Peter Zijlstra
2026-04-21 20:57 ` Sean Christopherson
2026-04-21 21:02 ` Peter Zijlstra
2026-04-21 21:42 ` Sean Christopherson
2026-04-21 20:39 ` Paolo Bonzini
2026-04-21 21:02 ` Sean Christopherson
2026-04-21 21:49 ` Thomas Gleixner
2026-04-21 22:07 ` Sean Christopherson
2026-04-21 22:24 ` Paolo Bonzini
2026-04-21 19:18 ` Verma, Vishal L
2026-04-21 16:30 ` Thomas Gleixner
2026-04-21 16:11 ` Verma, Vishal L
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260421200620.GK3126523@noisy.programming.kicks-ass.net \
--to=peterz@infradead.org \
--cc=binbin.wu@intel.com \
--cc=binbin.wu@linux.intel.com \
--cc=bonzini@redhat.com \
--cc=jmattson@google.com \
--cc=kvm@vger.kernel.org \
--cc=rick.p.edgecombe@intel.com \
--cc=seanjc@google.com \
--cc=tglx@kernel.org \
--cc=vishal.l.verma@intel.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox