From: Anish Ghulati <aghulati@google.com>
To: kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
Sean Christopherson <seanjc@google.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
Dave Hansen <dave.hansen@linux.intel.com>,
x86@kernel.org, hpa@zytor.com,
Vitaly Kuznetsov <vkuznets@redhat.com>,
peterz@infradead.org, paulmck@kernel.org,
Mark Rutland <mark.rutland@arm.com>
Cc: Anish Ghulati <aghulati@google.com>
Subject: [RFC PATCH 13/14] KVM: x86: VAC: Move all hardware enable/disable code into VAC
Date: Tue, 7 Nov 2023 20:20:01 +0000 [thread overview]
Message-ID: <20231107202002.667900-14-aghulati@google.com> (raw)
In-Reply-To: <20231107202002.667900-1-aghulati@google.com>
De-indirect hardware enable and disable. Now that all of these functions
are in the VAC, they don't need to be called via an indirect ops table and
static call.
Signed-off-by: Anish Ghulati <aghulati@google.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 2 -
arch/x86/kvm/svm/svm.c | 2 -
arch/x86/kvm/svm/svm_ops.h | 1 +
arch/x86/kvm/vac.c | 46 +++++++++++-
arch/x86/kvm/vac.h | 9 ++-
arch/x86/kvm/vmx/vmx.c | 2 -
arch/x86/kvm/vmx/vmx_ops.h | 1 +
arch/x86/kvm/x86.c | 117 -----------------------------
arch/x86/kvm/x86.h | 2 -
include/linux/kvm_host.h | 2 +
virt/kvm/vac.h | 5 ++
11 files changed, 58 insertions(+), 131 deletions(-)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 764be4a26a0c..340dcae9dd32 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -16,8 +16,6 @@ BUILD_BUG_ON(1)
*/
KVM_X86_OP(vendor_exit)
KVM_X86_OP(check_processor_compatibility)
-KVM_X86_OP(hardware_enable)
-KVM_X86_OP(hardware_disable)
KVM_X86_OP(hardware_unsetup)
KVM_X86_OP(has_emulated_msr)
KVM_X86_OP(vcpu_after_set_cpuid)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index df5673c98e7b..fb2c72430c7a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4739,8 +4739,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.check_processor_compatibility = svm_check_processor_compat,
.hardware_unsetup = svm_hardware_unsetup,
- .hardware_enable = svm_hardware_enable,
- .hardware_disable = svm_hardware_disable,
.has_emulated_msr = svm_has_emulated_msr,
.vcpu_create = svm_vcpu_create,
diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h
index 36c8af87a707..5e89c06b5147 100644
--- a/arch/x86/kvm/svm/svm_ops.h
+++ b/arch/x86/kvm/svm/svm_ops.h
@@ -4,6 +4,7 @@
#include <linux/compiler_types.h>
+#include "../vac.h"
#include "x86.h"
#define svm_asm(insn, clobber...) \
diff --git a/arch/x86/kvm/vac.c b/arch/x86/kvm/vac.c
index ab77aee4e1fa..79f5c2ac159a 100644
--- a/arch/x86/kvm/vac.c
+++ b/arch/x86/kvm/vac.c
@@ -3,6 +3,8 @@
#include "vac.h"
#include <asm/msr.h>
+extern bool kvm_rebooting;
+
u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
struct kvm_user_return_msrs __percpu *user_return_msrs;
@@ -35,7 +37,7 @@ void kvm_on_user_return(struct user_return_notifier *urn)
}
}
-void kvm_user_return_msr_cpu_online(void)
+static void kvm_user_return_msr_cpu_online(void)
{
unsigned int cpu = smp_processor_id();
struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
@@ -49,7 +51,7 @@ void kvm_user_return_msr_cpu_online(void)
}
}
-void drop_user_return_notifiers(void)
+static void drop_user_return_notifiers(void)
{
unsigned int cpu = smp_processor_id();
struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu);
@@ -117,6 +119,46 @@ int kvm_set_user_return_msr(unsigned int slot, u64 value, u64 mask)
return 0;
}
+int kvm_arch_hardware_enable(void)
+{
+ int ret = -EIO;
+
+ kvm_user_return_msr_cpu_online();
+
+ if (kvm_is_vmx_supported())
+ ret = vmx_hardware_enable();
+ else if (kvm_is_svm_supported())
+ ret = svm_hardware_enable();
+ if (ret != 0)
+ return ret;
+
+ // TODO: Handle unstable TSC
+
+ return 0;
+}
+
+void kvm_arch_hardware_disable(void)
+{
+ if (kvm_is_vmx_supported())
+ vmx_hardware_disable();
+ else if (kvm_is_svm_supported())
+ svm_hardware_disable();
+ drop_user_return_notifiers();
+}
+
+/*
+ * Handle a fault on a hardware virtualization (VMX or SVM) instruction.
+ *
+ * Hardware virtualization extension instructions may fault if a reboot turns
+ * off virtualization while processes are running. Usually after catching the
+ * fault we just panic; during reboot instead the instruction is ignored.
+ */
+noinstr void kvm_spurious_fault(void)
+{
+ /* Fault while not rebooting. We want the trace. */
+ BUG_ON(!kvm_rebooting);
+}
+
int kvm_alloc_user_return_msrs(void)
{
user_return_msrs = alloc_percpu(struct kvm_user_return_msrs);
diff --git a/arch/x86/kvm/vac.h b/arch/x86/kvm/vac.h
index 5be30cce5a1c..daf1f137d196 100644
--- a/arch/x86/kvm/vac.h
+++ b/arch/x86/kvm/vac.h
@@ -5,13 +5,14 @@
#include <linux/user-return-notifier.h>
-int __init vac_init(void);
-void vac_exit(void);
+void kvm_spurious_fault(void);
#ifdef CONFIG_KVM_INTEL
bool kvm_is_vmx_supported(void);
int __init vac_vmx_init(void);
void vac_vmx_exit(void);
+int vmx_hardware_enable(void);
+void vmx_hardware_disable(void);
#else
bool kvm_is_vmx_supported(void) { return false }
int __init vac_vmx_init(void)
@@ -25,6 +26,8 @@ void vac_vmx_exit(void) {}
bool kvm_is_svm_supported(void);
int __init vac_svm_init(void);
void vac_svm_exit(void);
+int svm_hardware_enable(void);
+void svm_hardware_disable(void);
#else
bool kvm_is_svm_supported(void) { return false }
int __init vac_svm_init(void)
@@ -59,8 +62,6 @@ int kvm_add_user_return_msr(u32 msr);
int kvm_find_user_return_msr(u32 msr);
int kvm_set_user_return_msr(unsigned int slot, u64 value, u64 mask);
void kvm_on_user_return(struct user_return_notifier *urn);
-void kvm_user_return_msr_cpu_online(void);
-void drop_user_return_notifiers(void);
static inline bool kvm_is_supported_user_return_msr(u32 msr)
{
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6301b49e0e80..69a6a8591996 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8013,8 +8013,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.hardware_unsetup = vmx_hardware_unsetup,
- .hardware_enable = vmx_hardware_enable,
- .hardware_disable = vmx_hardware_disable,
.has_emulated_msr = vmx_has_emulated_msr,
.vm_size = sizeof(struct kvm_vmx),
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index 33af7b4c6eb4..60325fd39120 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -8,6 +8,7 @@
#include "hyperv.h"
#include "vmcs.h"
+#include "../vac.h"
#include "../x86.h"
void vmread_error(unsigned long field);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7466a5945147..a74139061e4d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -370,19 +370,6 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 0;
}
-/*
- * Handle a fault on a hardware virtualization (VMX or SVM) instruction.
- *
- * Hardware virtualization extension instructions may fault if a reboot turns
- * off virtualization while processes are running. Usually after catching the
- * fault we just panic; during reboot instead the instruction is ignored.
- */
-noinstr void kvm_spurious_fault(void)
-{
- /* Fault while not rebooting. We want the trace. */
- BUG_ON(!kvm_rebooting);
-}
-
#define EXCPT_BENIGN 0
#define EXCPT_CONTRIBUTORY 1
#define EXCPT_PF 2
@@ -9363,7 +9350,6 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
return 0;
out_unwind_ops:
- kvm_x86_ops.hardware_enable = NULL;
static_call(kvm_x86_hardware_unsetup)();
out_mmu_exit:
kvm_mmu_vendor_module_exit();
@@ -9414,7 +9400,6 @@ void kvm_x86_vendor_exit(void)
WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
#endif
mutex_lock(&vendor_module_lock);
- kvm_x86_ops.hardware_enable = NULL;
mutex_unlock(&vendor_module_lock);
}
@@ -11952,108 +11937,6 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
kvm_rip_write(vcpu, 0);
}
-int kvm_arch_hardware_enable(void)
-{
- struct kvm *kvm;
- struct kvm_vcpu *vcpu;
- unsigned long i;
- int ret;
- u64 local_tsc;
- u64 max_tsc = 0;
- bool stable, backwards_tsc = false;
-
- kvm_user_return_msr_cpu_online();
-
- ret = kvm_x86_check_processor_compatibility();
- if (ret)
- return ret;
-
- ret = static_call(kvm_x86_hardware_enable)();
- if (ret != 0)
- return ret;
-
- local_tsc = rdtsc();
- stable = !kvm_check_tsc_unstable();
- list_for_each_entry(kvm, &vm_list, vm_list) {
- kvm_for_each_vcpu(i, vcpu, kvm) {
- if (!stable && vcpu->cpu == smp_processor_id())
- kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- if (stable && vcpu->arch.last_host_tsc > local_tsc) {
- backwards_tsc = true;
- if (vcpu->arch.last_host_tsc > max_tsc)
- max_tsc = vcpu->arch.last_host_tsc;
- }
- }
- }
-
- /*
- * Sometimes, even reliable TSCs go backwards. This happens on
- * platforms that reset TSC during suspend or hibernate actions, but
- * maintain synchronization. We must compensate. Fortunately, we can
- * detect that condition here, which happens early in CPU bringup,
- * before any KVM threads can be running. Unfortunately, we can't
- * bring the TSCs fully up to date with real time, as we aren't yet far
- * enough into CPU bringup that we know how much real time has actually
- * elapsed; our helper function, ktime_get_boottime_ns() will be using boot
- * variables that haven't been updated yet.
- *
- * So we simply find the maximum observed TSC above, then record the
- * adjustment to TSC in each VCPU. When the VCPU later gets loaded,
- * the adjustment will be applied. Note that we accumulate
- * adjustments, in case multiple suspend cycles happen before some VCPU
- * gets a chance to run again. In the event that no KVM threads get a
- * chance to run, we will miss the entire elapsed period, as we'll have
- * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may
- * loose cycle time. This isn't too big a deal, since the loss will be
- * uniform across all VCPUs (not to mention the scenario is extremely
- * unlikely). It is possible that a second hibernate recovery happens
- * much faster than a first, causing the observed TSC here to be
- * smaller; this would require additional padding adjustment, which is
- * why we set last_host_tsc to the local tsc observed here.
- *
- * N.B. - this code below runs only on platforms with reliable TSC,
- * as that is the only way backwards_tsc is set above. Also note
- * that this runs for ALL vcpus, which is not a bug; all VCPUs should
- * have the same delta_cyc adjustment applied if backwards_tsc
- * is detected. Note further, this adjustment is only done once,
- * as we reset last_host_tsc on all VCPUs to stop this from being
- * called multiple times (one for each physical CPU bringup).
- *
- * Platforms with unreliable TSCs don't have to deal with this, they
- * will be compensated by the logic in vcpu_load, which sets the TSC to
- * catchup mode. This will catchup all VCPUs to real time, but cannot
- * guarantee that they stay in perfect synchronization.
- */
- if (backwards_tsc) {
- u64 delta_cyc = max_tsc - local_tsc;
- list_for_each_entry(kvm, &vm_list, vm_list) {
- kvm->arch.backwards_tsc_observed = true;
- kvm_for_each_vcpu(i, vcpu, kvm) {
- vcpu->arch.tsc_offset_adjustment += delta_cyc;
- vcpu->arch.last_host_tsc = local_tsc;
- kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
- }
-
- /*
- * We have to disable TSC offset matching.. if you were
- * booting a VM while issuing an S4 host suspend....
- * you may have some problem. Solving this issue is
- * left as an exercise to the reader.
- */
- kvm->arch.last_tsc_nsec = 0;
- kvm->arch.last_tsc_write = 0;
- }
-
- }
- return 0;
-}
-
-void kvm_arch_hardware_disable(void)
-{
- static_call(kvm_x86_hardware_disable)();
- drop_user_return_notifiers();
-}
-
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
{
return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 1da8efcd3e9c..17ff3917b9a8 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -40,8 +40,6 @@ struct kvm_caps {
u64 supported_perf_cap;
};
-void kvm_spurious_fault(void);
-
#define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \
({ \
bool failed = (consistency_check); \
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index f0afe549c0d6..d26671682764 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1467,9 +1467,11 @@ static inline void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) {}
#endif
#ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING
+#ifndef CONFIG_X86
int kvm_arch_hardware_enable(void);
void kvm_arch_hardware_disable(void);
#endif
+#endif
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/vac.h b/virt/kvm/vac.h
index f3e7b08168df..b5159fa3f18d 100644
--- a/virt/kvm/vac.h
+++ b/virt/kvm/vac.h
@@ -13,6 +13,11 @@ int kvm_offline_cpu(unsigned int cpu);
void hardware_disable_all(void);
int hardware_enable_all(void);
+#ifdef CONFIG_X86
+int kvm_arch_hardware_enable(void);
+void kvm_arch_hardware_disable(void);
+#endif
+
extern struct notifier_block kvm_reboot_notifier;
extern struct syscore_ops kvm_syscore_ops;
--
2.42.0.869.gea05f2083d-goog
next prev parent reply other threads:[~2023-11-07 20:20 UTC|newest]
Thread overview: 22+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-11-07 20:19 [RFC PATCH 00/14] Support multiple KVM modules on the same host Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 01/14] KVM: x86: Move common module params from SVM/VMX to x86 Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 02/14] KVM: x86: Fold x86 vendor modules into the main KVM modules Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 03/14] KVM: x86: Remove unused exports Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 04/14] KVM: x86: Create stubs for a new VAC module Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 05/14] KVM: x86: Refactor hardware enable/disable operations into a new file Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 06/14] KVM: x86: Move user return msr operations out of KVM Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 07/14] KVM: SVM: Move shared SVM data structures into VAC Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 08/14] KVM: VMX: Move shared VMX " Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 09/14] KVM: x86: Move shared KVM state " Anish Ghulati
2023-11-17 8:54 ` Lai Jiangshan
2023-11-28 18:01 ` Sean Christopherson
2023-11-07 20:19 ` [RFC PATCH 10/14] KVM: VMX: Move VMX enable and disable " Anish Ghulati
2023-11-07 20:19 ` [RFC PATCH 11/14] KVM: SVM: Move SVM " Anish Ghulati
2023-11-07 20:20 ` [RFC PATCH 12/14] KVM: x86: Move VMX and SVM support checks " Anish Ghulati
2023-11-07 20:20 ` Anish Ghulati [this message]
2023-11-07 20:20 ` [RFC PATCH 14/14] KVM: VAC: Bring up VAC as a new module Anish Ghulati
2023-11-17 8:53 ` [RFC PATCH 00/14] Support multiple KVM modules on the same host Lai Jiangshan
2023-11-28 18:10 ` Sean Christopherson
2026-01-05 7:48 ` Hou Wenlong
2026-01-07 15:54 ` Sean Christopherson
2026-01-08 6:55 ` Hou Wenlong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231107202002.667900-14-aghulati@google.com \
--to=aghulati@google.com \
--cc=bp@alien8.de \
--cc=dave.hansen@linux.intel.com \
--cc=hpa@zytor.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=paulmck@kernel.org \
--cc=pbonzini@redhat.com \
--cc=peterz@infradead.org \
--cc=seanjc@google.com \
--cc=tglx@linutronix.de \
--cc=vkuznets@redhat.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.