public inbox for linux-pm@vger.kernel.org
 help / color / mirror / Atom feed
From: "Xin Li (Intel)" <xin@zytor.com>
To: linux-kernel@vger.kernel.org, kvm@vger.kernel.org,
	linux-pm@vger.kernel.org
Cc: seanjc@google.com, pbonzini@redhat.com, tglx@linutronix.de,
	mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com,
	x86@kernel.org, hpa@zytor.com, rafael@kernel.org,
	pavel@kernel.org, brgerst@gmail.com, xin@zytor.com,
	david.kaplan@amd.com, peterz@infradead.org,
	andrew.cooper3@citrix.com, kprateek.nayak@amd.com,
	arjan@linux.intel.com, chao.gao@intel.com,
	rick.p.edgecombe@intel.com, dan.j.williams@intel.com
Subject: [RFC PATCH v1 3/5] x86/shutdown, KVM: VMX: Move VMCLEAR of VMCSs to cpu_disable_virtualization()
Date: Tue,  9 Sep 2025 11:28:23 -0700	[thread overview]
Message-ID: <20250909182828.1542362-4-xin@zytor.com> (raw)
In-Reply-To: <20250909182828.1542362-1-xin@zytor.com>

Relocate the VMCLEAR of VMCSs from KVM to cpu_disable_virtualization() in
x86.  This eliminates the need to call cpu_emergency_disable_virtualization()
before cpu_disable_virtualization() and prepares for removing the emergency
reboot callback that calls into KVM from the CPU reboot path.

Signed-off-by: Xin Li (Intel) <xin@zytor.com>
---
 arch/x86/include/asm/processor.h |  1 +
 arch/x86/kernel/cpu/common.c     | 34 ++++++++++++++++++++++++++++++++
 arch/x86/kernel/crash.c          |  3 ---
 arch/x86/kernel/reboot.c         |  7 +++----
 arch/x86/kernel/smp.c            |  6 ------
 arch/x86/kvm/vmx/vmcs.h          |  5 ++++-
 arch/x86/kvm/vmx/vmx.c           | 34 +++-----------------------------
 7 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0bfd4eb1e9e2..d8a28c57248d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -230,6 +230,7 @@ void init_cpu_devs(void);
 void get_cpu_vendor(struct cpuinfo_x86 *c);
 extern void early_cpu_init(void);
 extern void identify_secondary_cpu(unsigned int cpu);
+extern struct list_head* get_loaded_vmcss_on_cpu(int cpu);
 extern void cpu_enable_virtualization(void);
 extern void cpu_disable_virtualization(void);
 extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 39b9be9a2fb1..73abacf57ed4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1950,6 +1950,18 @@ union vmxon_vmcs {
 };
 
 static DEFINE_PER_CPU_PAGE_ALIGNED(union vmxon_vmcs, vmxon_vmcs);
+/*
+ * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
+ * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
+ */
+static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
+
+/* Export an accessor rather than the raw data */
+struct list_head* get_loaded_vmcss_on_cpu(int cpu)
+{
+	return &per_cpu(loaded_vmcss_on_cpu, cpu);
+}
+EXPORT_SYMBOL_GPL(get_loaded_vmcss_on_cpu);
 
 /*
  * Executed during the CPU startup phase to execute VMXON to enable VMX. This
@@ -1975,6 +1987,8 @@ void cpu_enable_virtualization(void)
 		return;
 	}
 
+	INIT_LIST_HEAD(get_loaded_vmcss_on_cpu(cpu));
+
 	memset(this_cpu_ptr(&vmxon_vmcs), 0, PAGE_SIZE);
 
 	/*
@@ -2002,6 +2016,18 @@ void cpu_enable_virtualization(void)
 	intel_pt_handle_vmx(0);
 }
 
+static __always_inline void vmclear(void *p)
+{
+	u64 pa = __pa(p);
+	asm volatile ("vmclear %0" : : "m"(pa) : "cc");
+}
+
+struct loaded_vmcs_basic {
+	struct list_head loaded_vmcss_on_cpu_link;
+	struct vmcs_hdr *vmcs;
+	struct vmcs_hdr *shadow_vmcs;
+};
+
 /*
  * Because INIT interrupts are blocked during VMX operation, this function
  * must be called just before a CPU shuts down to ensure it can be brought
@@ -2016,6 +2042,7 @@ void cpu_enable_virtualization(void)
 void cpu_disable_virtualization(void)
 {
 	int cpu = raw_smp_processor_id();
+	struct loaded_vmcs_basic *v;
 
 	if (!is_vmx_supported())
 		return;
@@ -2025,6 +2052,13 @@ void cpu_disable_virtualization(void)
 		return;
 	}
 
+	list_for_each_entry(v, get_loaded_vmcss_on_cpu(cpu),
+			    loaded_vmcss_on_cpu_link) {
+		vmclear(v->vmcs);
+		if (v->shadow_vmcs)
+			vmclear(v->shadow_vmcs);
+	}
+
 	asm goto("1: vmxoff\n\t"
 		 _ASM_EXTABLE(1b, %l[fault])
 		 ::: "cc", "memory" : fault);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 772c6d350b50..e5b374587be2 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -111,9 +111,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
 
 	crash_smp_send_stop();
 
-	/* Kept to VMCLEAR loaded VMCSs */
-	cpu_emergency_disable_virtualization();
-
 	/*
 	 * Disable Intel PT to stop its logging
 	 */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 7433e634018f..d8c3e2d8481f 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -633,7 +633,7 @@ static void native_machine_emergency_restart(void)
 	unsigned short mode;
 
 	if (reboot_emergency)
-		emergency_reboot_disable_virtualization();
+		nmi_shootdown_cpus_on_restart();
 
 	tboot_shutdown(TB_SHUTDOWN_REBOOT);
 
@@ -876,9 +876,6 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
 	if (shootdown_callback)
 		shootdown_callback(cpu, regs);
 
-	/* Kept to VMCLEAR loaded VMCSs */
-	cpu_emergency_disable_virtualization();
-
 	atomic_dec(&waiting_for_crash_ipi);
 
 	/* Disable virtualization, usually this is an AP */
@@ -955,6 +952,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
 
 static inline void nmi_shootdown_cpus_on_restart(void)
 {
+	local_irq_disable();
+
 	if (!crash_ipi_issued)
 		nmi_shootdown_cpus(NULL);
 }
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index eb6a389ba1a9..b4f50c88e7e2 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -124,9 +124,6 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
 	if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
 		return NMI_HANDLED;
 
-	/* Kept to VMCLEAR loaded VMCSs */
-	cpu_emergency_disable_virtualization();
-
 	stop_this_cpu(NULL);
 
 	return NMI_HANDLED;
@@ -139,9 +136,6 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
 {
 	apic_eoi();
 
-	/* Kept to VMCLEAR loaded VMCSs */
-	cpu_emergency_disable_virtualization();
-
 	stop_this_cpu(NULL);
 }
 
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index da5631924432..10cbfd567dec 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -52,8 +52,12 @@ struct vmcs_controls_shadow {
  * Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
  * remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
  * loaded on this CPU (so we can clear them if the CPU goes down).
+ *
+ * Note, the first three members must be a list_head and two pointers, please
+ * refer to struct loaded_vmcs_basic defined in arch/x86/kernel/cpu/common.c.
  */
 struct loaded_vmcs {
+	struct list_head loaded_vmcss_on_cpu_link;
 	struct vmcs *vmcs;
 	struct vmcs *shadow_vmcs;
 	int cpu;
@@ -65,7 +69,6 @@ struct loaded_vmcs {
 	ktime_t entry_time;
 	s64 vnmi_blocked_time;
 	unsigned long *msr_bitmap;
-	struct list_head loaded_vmcss_on_cpu_link;
 	struct vmcs_host_state host_state;
 	struct vmcs_controls_shadow controls_shadow;
 };
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 26af0a8ae08f..b033288e645a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -469,11 +469,6 @@ noinline void invept_error(unsigned long ext, u64 eptp)
 }
 
 DEFINE_PER_CPU(struct vmcs *, current_vmcs);
-/*
- * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
- * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
- */
-static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
 
 static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
 static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -676,26 +671,6 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
 
 void vmx_emergency_disable_virtualization_cpu(void)
 {
-	int cpu = raw_smp_processor_id();
-	struct loaded_vmcs *v;
-
-	kvm_rebooting = true;
-
-	/*
-	 * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
-	 * set in task context.  If this races with VMX is disabled by an NMI,
-	 * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to
-	 * kvm_rebooting set.
-	 */
-	if (!(__read_cr4() & X86_CR4_VMXE))
-		return;
-
-	list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
-			    loaded_vmcss_on_cpu_link) {
-		vmcs_clear(v->vmcs);
-		if (v->shadow_vmcs)
-			vmcs_clear(v->shadow_vmcs);
-	}
 }
 
 static void __loaded_vmcs_clear(void *arg)
@@ -1388,7 +1363,7 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
 		smp_rmb();
 
 		list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
-			 &per_cpu(loaded_vmcss_on_cpu, cpu));
+			 get_loaded_vmcss_on_cpu(cpu));
 		local_irq_enable();
 	}
 
@@ -2754,7 +2729,7 @@ static void vmclear_local_loaded_vmcss(void)
 	int cpu = raw_smp_processor_id();
 	struct loaded_vmcs *v, *n;
 
-	list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
+	list_for_each_entry_safe(v, n, get_loaded_vmcss_on_cpu(cpu),
 				 loaded_vmcss_on_cpu_link)
 		__loaded_vmcs_clear(v);
 }
@@ -8441,11 +8416,8 @@ int __init vmx_init(void)
 	if (r)
 		goto err_l1d_flush;
 
-	for_each_possible_cpu(cpu) {
-		INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
-
+	for_each_possible_cpu(cpu)
 		pi_init_cpu(cpu);
-	}
 
 	vmx_check_vmcs12_offsets();
 
-- 
2.51.0


  parent reply	other threads:[~2025-09-09 18:31 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-09-09 18:28 [RFC PATCH v1 0/5] x86/boot, KVM: Move VMXON/VMXOFF handling from KVM to CPU lifecycle Xin Li (Intel)
2025-09-09 18:28 ` [RFC PATCH v1 1/5] x86/boot: Shift VMXON from KVM init to CPU startup phase Xin Li (Intel)
2025-09-10  5:37   ` Adrian Hunter
2025-09-10  7:25   ` Chao Gao
2025-09-11  6:57     ` Xin Li
2025-09-10  8:02   ` Huang, Kai
2025-09-10 11:10     ` Chao Gao
2025-09-10 11:35       ` Huang, Kai
2025-09-10 13:13         ` Arjan van de Ven
2025-09-10 20:52           ` Huang, Kai
2025-09-09 18:28 ` [RFC PATCH v1 2/5] x86/boot: Move VMXOFF from KVM teardown to CPU shutdown phase Xin Li (Intel)
2025-09-09 18:28 ` Xin Li (Intel) [this message]
2025-09-09 18:28 ` [RFC PATCH v1 4/5] x86/reboot: Remove emergency_reboot_disable_virtualization() Xin Li (Intel)
2025-09-09 18:28 ` [RFC PATCH v1 5/5] KVM: Remove kvm_rebooting and its references Xin Li (Intel)
2025-09-16 17:56   ` Sean Christopherson
2025-09-17 16:51     ` Xin Li
2025-09-17 23:02       ` Sean Christopherson
2025-09-11 14:20 ` [RFC PATCH v1 0/5] x86/boot, KVM: Move VMXON/VMXOFF handling from KVM to CPU lifecycle Sean Christopherson
2025-09-11 15:20   ` Dave Hansen
2025-09-16 17:29     ` Sean Christopherson
2025-09-11 17:04   ` Arjan van de Ven
2025-09-16 17:54     ` Sean Christopherson
2025-09-16 18:25       ` Jim Mattson
2025-09-17 13:48       ` Arjan van de Ven
2025-09-17 17:30       ` Xin Li
2025-09-17 22:40         ` Sean Christopherson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250909182828.1542362-4-xin@zytor.com \
    --to=xin@zytor.com \
    --cc=andrew.cooper3@citrix.com \
    --cc=arjan@linux.intel.com \
    --cc=bp@alien8.de \
    --cc=brgerst@gmail.com \
    --cc=chao.gao@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=david.kaplan@amd.com \
    --cc=hpa@zytor.com \
    --cc=kprateek.nayak@amd.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pm@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pavel@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rafael@kernel.org \
    --cc=rick.p.edgecombe@intel.com \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox