From: Anish Ghulati <aghulati@google.com>
To: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
Sean Christopherson <seanjc@google.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
x86@kernel.org, hpa@zytor.com,
Vitaly Kuznetsov <vkuznets@redhat.com>,
peterz@infradead.org, paulmck@kernel.org,
Mark Rutland <mark.rutland@arm.com>
Cc: Anish Ghulati <aghulati@google.com>
Subject: [RFC PATCH 05/14] KVM: x86: Refactor hardware enable/disable operations into a new file
Date: Tue, 7 Nov 2023 20:19:53 +0000 [thread overview]
Message-ID: <20231107202002.667900-6-aghulati@google.com> (raw)
In-Reply-To: <20231107202002.667900-1-aghulati@google.com>
Move KVM's hardware enabling to vac.{h,c} as a first step towards
building VAC and all of the system-wide virtualization support as a
separate module.
Defer moving arch code to future patches to keep the diff reasonable.
No functional change intended.
Signed-off-by: Anish Ghulati <aghulati@google.com>
---
virt/kvm/kvm_main.c | 197 +-------------------------------------------
virt/kvm/vac.c | 177 +++++++++++++++++++++++++++++++++++++++
virt/kvm/vac.h | 26 ++++++
3 files changed, 204 insertions(+), 196 deletions(-)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f585a159b4f5..fb50deaad3fd 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -59,6 +59,7 @@
#include "coalesced_mmio.h"
#include "async_pf.h"
#include "kvm_mm.h"
+#include "vac.h"
#include "vfio.h"
#include <trace/events/ipi.h>
@@ -140,8 +141,6 @@ static int kvm_no_compat_open(struct inode *inode, struct file *file)
#define KVM_COMPAT(c) .compat_ioctl = kvm_no_compat_ioctl, \
.open = kvm_no_compat_open
#endif
-static int hardware_enable_all(void);
-static void hardware_disable_all(void);
static void kvm_io_bus_destroy(struct kvm_io_bus *bus);
@@ -5167,200 +5166,6 @@ static struct miscdevice kvm_dev = {
&kvm_chardev_ops,
};
-#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
-__visible bool kvm_rebooting;
-EXPORT_SYMBOL_GPL(kvm_rebooting);
-
-static DEFINE_PER_CPU(bool, hardware_enabled);
-static int kvm_usage_count;
-
-static int __hardware_enable_nolock(void)
-{
- if (__this_cpu_read(hardware_enabled))
- return 0;
-
- if (kvm_arch_hardware_enable()) {
- pr_info("kvm: enabling virtualization on CPU%d failed\n",
- raw_smp_processor_id());
- return -EIO;
- }
-
- __this_cpu_write(hardware_enabled, true);
- return 0;
-}
-
-static void hardware_enable_nolock(void *failed)
-{
- if (__hardware_enable_nolock())
- atomic_inc(failed);
-}
-
-static int kvm_online_cpu(unsigned int cpu)
-{
- int ret = 0;
-
- /*
- * Abort the CPU online process if hardware virtualization cannot
- * be enabled. Otherwise running VMs would encounter unrecoverable
- * errors when scheduled to this CPU.
- */
- mutex_lock(&kvm_lock);
- if (kvm_usage_count)
- ret = __hardware_enable_nolock();
- mutex_unlock(&kvm_lock);
- return ret;
-}
-
-static void hardware_disable_nolock(void *junk)
-{
- /*
- * Note, hardware_disable_all_nolock() tells all online CPUs to disable
- * hardware, not just CPUs that successfully enabled hardware!
- */
- if (!__this_cpu_read(hardware_enabled))
- return;
-
- kvm_arch_hardware_disable();
-
- __this_cpu_write(hardware_enabled, false);
-}
-
-static int kvm_offline_cpu(unsigned int cpu)
-{
- mutex_lock(&kvm_lock);
- if (kvm_usage_count)
- hardware_disable_nolock(NULL);
- mutex_unlock(&kvm_lock);
- return 0;
-}
-
-static void hardware_disable_all_nolock(void)
-{
- BUG_ON(!kvm_usage_count);
-
- kvm_usage_count--;
- if (!kvm_usage_count)
- on_each_cpu(hardware_disable_nolock, NULL, 1);
-}
-
-static void hardware_disable_all(void)
-{
- cpus_read_lock();
- mutex_lock(&kvm_lock);
- hardware_disable_all_nolock();
- mutex_unlock(&kvm_lock);
- cpus_read_unlock();
-}
-
-static int hardware_enable_all(void)
-{
- atomic_t failed = ATOMIC_INIT(0);
- int r;
-
- /*
- * Do not enable hardware virtualization if the system is going down.
- * If userspace initiated a forced reboot, e.g. reboot -f, then it's
- * possible for an in-flight KVM_CREATE_VM to trigger hardware enabling
- * after kvm_reboot() is called. Note, this relies on system_state
- * being set _before_ kvm_reboot(), which is why KVM uses a syscore ops
- * hook instead of registering a dedicated reboot notifier (the latter
- * runs before system_state is updated).
- */
- if (system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF ||
- system_state == SYSTEM_RESTART)
- return -EBUSY;
-
- /*
- * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu()
- * is called, and so on_each_cpu() between them includes the CPU that
- * is being onlined. As a result, hardware_enable_nolock() may get
- * invoked before kvm_online_cpu(), which also enables hardware if the
- * usage count is non-zero. Disable CPU hotplug to avoid attempting to
- * enable hardware multiple times.
- */
- cpus_read_lock();
- mutex_lock(&kvm_lock);
-
- r = 0;
-
- kvm_usage_count++;
- if (kvm_usage_count == 1) {
- on_each_cpu(hardware_enable_nolock, &failed, 1);
-
- if (atomic_read(&failed)) {
- hardware_disable_all_nolock();
- r = -EBUSY;
- }
- }
-
- mutex_unlock(&kvm_lock);
- cpus_read_unlock();
-
- return r;
-}
-
-static void kvm_shutdown(void)
-{
- /*
- * Disable hardware virtualization and set kvm_rebooting to indicate
- * that KVM has asynchronously disabled hardware virtualization, i.e.
- * that relevant errors and exceptions aren't entirely unexpected.
- * Some flavors of hardware virtualization need to be disabled before
- * transferring control to firmware (to perform shutdown/reboot), e.g.
- * on x86, virtualization can block INIT interrupts, which are used by
- * firmware to pull APs back under firmware control. Note, this path
- * is used for both shutdown and reboot scenarios, i.e. neither name is
- * 100% comprehensive.
- */
- pr_info("kvm: exiting hardware virtualization\n");
- kvm_rebooting = true;
- on_each_cpu(hardware_disable_nolock, NULL, 1);
-}
-
-static int kvm_suspend(void)
-{
- /*
- * Secondary CPUs and CPU hotplug are disabled across the suspend/resume
- * callbacks, i.e. no need to acquire kvm_lock to ensure the usage count
- * is stable. Assert that kvm_lock is not held to ensure the system
- * isn't suspended while KVM is enabling hardware. Hardware enabling
- * can be preempted, but the task cannot be frozen until it has dropped
- * all locks (userspace tasks are frozen via a fake signal).
- */
- lockdep_assert_not_held(&kvm_lock);
- lockdep_assert_irqs_disabled();
-
- if (kvm_usage_count)
- hardware_disable_nolock(NULL);
- return 0;
-}
-
-static void kvm_resume(void)
-{
- lockdep_assert_not_held(&kvm_lock);
- lockdep_assert_irqs_disabled();
-
- if (kvm_usage_count)
- WARN_ON_ONCE(__hardware_enable_nolock());
-}
-
-static struct syscore_ops kvm_syscore_ops = {
- .suspend = kvm_suspend,
- .resume = kvm_resume,
- .shutdown = kvm_shutdown,
-};
-#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
-static int hardware_enable_all(void)
-{
- return 0;
-}
-
-static void hardware_disable_all(void)
-{
-
-}
-#endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
-
static void kvm_iodevice_destructor(struct kvm_io_device *dev)
{
if (dev->ops->destructor)
diff --git a/virt/kvm/vac.c b/virt/kvm/vac.c
index 18d2ae7d3e47..ff034a53af50 100644
--- a/virt/kvm/vac.c
+++ b/virt/kvm/vac.c
@@ -1,3 +1,180 @@
// SPDX-License-Identifier: GPL-2.0-only
#include "vac.h"
+
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/mutex.h>
+
+#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+DEFINE_MUTEX(vac_lock);
+
+__visible bool kvm_rebooting;
+EXPORT_SYMBOL_GPL(kvm_rebooting);
+
+static DEFINE_PER_CPU(bool, hardware_enabled);
+static int kvm_usage_count;
+
+static int __hardware_enable_nolock(void)
+{
+ if (__this_cpu_read(hardware_enabled))
+ return 0;
+
+ if (kvm_arch_hardware_enable()) {
+ pr_info("kvm: enabling virtualization on CPU%d failed\n",
+ raw_smp_processor_id());
+ return -EIO;
+ }
+
+ __this_cpu_write(hardware_enabled, true);
+ return 0;
+}
+
+static void hardware_enable_nolock(void *failed)
+{
+ if (__hardware_enable_nolock())
+ atomic_inc(failed);
+}
+
+int kvm_online_cpu(unsigned int cpu)
+{
+ int ret = 0;
+
+ /*
+ * Abort the CPU online process if hardware virtualization cannot
+ * be enabled. Otherwise running VMs would encounter unrecoverable
+ * errors when scheduled to this CPU.
+ */
+ mutex_lock(&vac_lock);
+ if (kvm_usage_count)
+ ret = __hardware_enable_nolock();
+ mutex_unlock(&vac_lock);
+ return ret;
+}
+
+static void hardware_disable_nolock(void *junk)
+{
+ /*
+ * Note, hardware_disable_all_nolock() tells all online CPUs to disable
+ * hardware, not just CPUs that successfully enabled hardware!
+ */
+ if (!__this_cpu_read(hardware_enabled))
+ return;
+
+ kvm_arch_hardware_disable();
+
+ __this_cpu_write(hardware_enabled, false);
+}
+
+int kvm_offline_cpu(unsigned int cpu)
+{
+ mutex_lock(&vac_lock);
+ if (kvm_usage_count)
+ hardware_disable_nolock(NULL);
+ mutex_unlock(&vac_lock);
+ return 0;
+}
+
+static void hardware_disable_all_nolock(void)
+{
+ BUG_ON(!kvm_usage_count);
+
+ kvm_usage_count--;
+ if (!kvm_usage_count)
+ on_each_cpu(hardware_disable_nolock, NULL, 1);
+}
+
+void hardware_disable_all(void)
+{
+ cpus_read_lock();
+ mutex_lock(&vac_lock);
+ hardware_disable_all_nolock();
+ mutex_unlock(&vac_lock);
+ cpus_read_unlock();
+}
+
+int hardware_enable_all(void)
+{
+ atomic_t failed = ATOMIC_INIT(0);
+ int r = 0;
+
+ /*
+ * When onlining a CPU, cpu_online_mask is set before kvm_online_cpu()
+ * is called, and so on_each_cpu() between them includes the CPU that
+ * is being onlined. As a result, hardware_enable_nolock() may get
+ * invoked before kvm_online_cpu(), which also enables hardware if the
+ * usage count is non-zero. Disable CPU hotplug to avoid attempting to
+ * enable hardware multiple times.
+ */
+ cpus_read_lock();
+ mutex_lock(&vac_lock);
+
+ kvm_usage_count++;
+ if (kvm_usage_count == 1) {
+ on_each_cpu(hardware_enable_nolock, &failed, 1);
+
+ if (atomic_read(&failed)) {
+ hardware_disable_all_nolock();
+ r = -EBUSY;
+ }
+ }
+
+ mutex_unlock(&vac_lock);
+ cpus_read_unlock();
+
+ return r;
+}
+
+static int kvm_reboot(struct notifier_block *notifier, unsigned long val,
+ void *v)
+{
+ /*
+ * Some (well, at least mine) BIOSes hang on reboot if
+ * in vmx root mode.
+ *
+ * And Intel TXT required VMX off for all cpu when system shutdown.
+ */
+ pr_info("kvm: exiting hardware virtualization\n");
+ kvm_rebooting = true;
+ on_each_cpu(hardware_disable_nolock, NULL, 1);
+ return NOTIFY_OK;
+}
+
+static int kvm_suspend(void)
+{
+ /*
+ * Secondary CPUs and CPU hotplug are disabled across the suspend/resume
+ * callbacks, i.e. no need to acquire vac_lock to ensure the usage count
+ * is stable. Assert that vac_lock is not held to ensure the system
+ * isn't suspended while KVM is enabling hardware. Hardware enabling
+ * can be preempted, but the task cannot be frozen until it has dropped
+ * all locks (userspace tasks are frozen via a fake signal).
+ */
+ lockdep_assert_not_held(&vac_lock);
+ lockdep_assert_irqs_disabled();
+
+ if (kvm_usage_count)
+ hardware_disable_nolock(NULL);
+ return 0;
+}
+
+static void kvm_resume(void)
+{
+ lockdep_assert_not_held(&vac_lock);
+ lockdep_assert_irqs_disabled();
+
+ if (kvm_usage_count)
+ WARN_ON_ONCE(__hardware_enable_nolock());
+}
+
+struct notifier_block kvm_reboot_notifier = {
+ .notifier_call = kvm_reboot,
+ .priority = 0,
+};
+
+struct syscore_ops kvm_syscore_ops = {
+ .suspend = kvm_suspend,
+ .resume = kvm_resume,
+};
+
+#endif
diff --git a/virt/kvm/vac.h b/virt/kvm/vac.h
index 8f7123a916c5..aed178a16bdb 100644
--- a/virt/kvm/vac.h
+++ b/virt/kvm/vac.h
@@ -3,4 +3,30 @@
#ifndef __KVM_VAC_H__
#define __KVM_VAC_H__
+#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+
+#include <linux/kvm_host.h>
+#include <linux/syscore_ops.h>
+
+int kvm_online_cpu(unsigned int cpu);
+int kvm_offline_cpu(unsigned int cpu);
+void hardware_disable_all(void);
+int hardware_enable_all(void);
+
+extern struct notifier_block kvm_reboot_notifier;
+
+extern struct syscore_ops kvm_syscore_ops;
+
+#else /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
+static inline int hardware_enable_all(void)
+{
+ return 0;
+}
+
+static inline void hardware_disable_all(void)
+{
+
+}
+#endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */
+
#endif
--
2.42.0.869.gea05f2083d-goog
next prev parent reply other threads:[~2023-11-07 20:20 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-07 20:19 [RFC PATCH 00/14] Support multiple KVM modules on the same host Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 01/14] KVM: x86: Move common module params from SVM/VMX to x86 Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 02/14] KVM: x86: Fold x86 vendor modules into the main KVM modules Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 03/14] KVM: x86: Remove unused exports Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 04/14] KVM: x86: Create stubs for a new VAC module Anish Ghulati
2023-11-07 20:19 ` Anish Ghulati [this message]
2023-11-07 20:19 ` [RFC PATCH 06/14] KVM: x86: Move user return msr operations out of KVM Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 07/14] KVM: SVM: Move shared SVM data structures into VAC Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 08/14] KVM: VMX: Move shared VMX " Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 09/14] KVM: x86: Move shared KVM state " Anish Ghulati
2023-11-17 8:54 ` Lai Jiangshan
2023-11-28 18:01 ` Sean Christopherson
2023-11-07 20:19 ` [RFC PATCH 10/14] KVM: VMX: Move VMX enable and disable " Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 11/14] KVM: SVM: Move SVM " Anish Ghulati
2023-11-07 20:20 ` [RFC PATCH 12/14] KVM: x86: Move VMX and SVM support checks " Anish Ghulati
2023-11-07 20:20 ` [RFC PATCH 13/14] KVM: x86: VAC: Move all hardware enable/disable code " Anish Ghulati
2023-11-07 20:20 ` [RFC PATCH 14/14] KVM: VAC: Bring up VAC as a new module Anish Ghulati
2023-11-17 8:53 ` [RFC PATCH 00/14] Support multiple KVM modules on the same host Lai Jiangshan
2023-11-28 18:10 ` Sean Christopherson
2026-01-05 7:48 ` Hou Wenlong
2026-01-07 15:54 ` Sean Christopherson
2026-01-08 6:55 ` Hou Wenlong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231107202002.667900-6-aghulati@google.com \
--to=aghulati@google.com \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=paulmck@kernel.org \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=seanjc@google.com \
--cc=tglx@linutronix.de \
--cc=vkuznets@redhat.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox