public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Lai Jiangshan <jiangshanlai@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Lai Jiangshan <jiangshan.ljs@antgroup.com>,
	Hou Wenlong <houwenlong.hwl@antgroup.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Sean Christopherson <seanjc@google.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Borislav Petkov <bp@alien8.de>, Ingo Molnar <mingo@redhat.com>,
	kvm@vger.kernel.org, Paolo Bonzini <pbonzini@redhat.com>,
	x86@kernel.org, Kees Cook <keescook@chromium.org>,
	Juergen Gross <jgross@suse.com>,
	Dave Hansen <dave.hansen@linux.intel.com>,
	"H. Peter Anvin" <hpa@zytor.com>
Subject: [RFC PATCH 17/73] KVM: x86/PVM: Implement module initialization related callbacks
Date: Mon, 26 Feb 2024 22:35:34 +0800	[thread overview]
Message-ID: <20240226143630.33643-18-jiangshanlai@gmail.com> (raw)
In-Reply-To: <20240226143630.33643-1-jiangshanlai@gmail.com>

From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

Implement hardware enable/disable and setup/unsetup callbacks for PVM
module initialization.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Signed-off-by: Hou Wenlong <houwenlong.hwl@antgroup.com>
---
 arch/x86/kvm/pvm/pvm.c | 226 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/pvm/pvm.h |  20 ++++
 2 files changed, 246 insertions(+)

diff --git a/arch/x86/kvm/pvm/pvm.c b/arch/x86/kvm/pvm/pvm.c
index 1dfa1ae57c8c..83aa2c9f42f6 100644
--- a/arch/x86/kvm/pvm/pvm.c
+++ b/arch/x86/kvm/pvm/pvm.c
@@ -9,18 +9,244 @@
  * the COPYING file in the top-level directory.
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/module.h>
 
+#include <asm/pvm_para.h>
+
+#include "cpuid.h"
+#include "x86.h"
+#include "pvm.h"
+
 MODULE_AUTHOR("AntGroup");
 MODULE_LICENSE("GPL");
 
+static bool __read_mostly is_intel;
+
+static unsigned long host_idt_base;
+
+static void pvm_setup_mce(struct kvm_vcpu *vcpu)
+{
+}
+
+static bool pvm_has_emulated_msr(struct kvm *kvm, u32 index)
+{
+	switch (index) {
+	case MSR_IA32_MCG_EXT_CTL:
+	case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR:
+		return false;
+	case MSR_AMD64_VIRT_SPEC_CTRL:
+	case MSR_AMD64_TSC_RATIO:
+		/* This is AMD SVM only. */
+		return false;
+	case MSR_IA32_SMBASE:
+		/* Currenlty we only run guest in long mode. */
+		return false;
+	default:
+		break;
+	}
+
+	return true;
+}
+
+static bool cpu_has_pvm_wbinvd_exit(void)
+{
+	return true;
+}
+
+static int hardware_enable(void)
+{
+	/* Nothing to do */
+	return 0;
+}
+
+static void hardware_disable(void)
+{
+	/* Nothing to do */
+}
+
+static int pvm_check_processor_compat(void)
+{
+	/* Nothing to do */
+	return 0;
+}
+
+static __init void pvm_set_cpu_caps(void)
+{
+	if (boot_cpu_has(X86_FEATURE_NX))
+		kvm_enable_efer_bits(EFER_NX);
+	if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
+		kvm_enable_efer_bits(EFER_FFXSR);
+
+	kvm_set_cpu_caps();
+
+	/* Unloading kvm-intel.ko doesn't clean up kvm_caps.supported_mce_cap. */
+	kvm_caps.supported_mce_cap = MCG_CTL_P | MCG_SER_P;
+
+	kvm_caps.supported_xss = 0;
+
+	/* PVM supervisor mode runs on hardware ring3, so no xsaves. */
+	kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
+
+	/*
+	 * PVM supervisor mode runs on hardware ring3, so SMEP and SMAP can not
+	 * be supported directly through hardware.  But they can be emulated
+	 * through other hardware feature when needed.
+	 */
+
+	/*
+	 * PVM doesn't support SMAP, but the similar protection might be
+	 * emulated via PKU in the future.
+	 */
+	kvm_cpu_cap_clear(X86_FEATURE_SMAP);
+
+	/*
+	 * PVM doesn't support SMEP.  When NX is supported and the guest can
+	 * use NX on the user pagetable to emulate the same protection as SMEP.
+	 */
+	kvm_cpu_cap_clear(X86_FEATURE_SMEP);
+
+	/*
+	 * Unlike VMX/SVM which can switches paging mode atomically, PVM
+	 * implements guest LA57 through host LA57 shadow paging.
+	 */
+	if (!pgtable_l5_enabled())
+		kvm_cpu_cap_clear(X86_FEATURE_LA57);
+
+	/*
+	 * Even host pcid is not enabled, guest pcid can be enabled to reduce
+	 * the heavy guest tlb flushing.  Guest CR4.PCIDE is not directly
+	 * mapped to the hardware and is virtualized by PVM so that it can be
+	 * enabled unconditionally.
+	 */
+	kvm_cpu_cap_set(X86_FEATURE_PCID);
+
+	/* Don't expose MSR_IA32_SPEC_CTRL to guest */
+	kvm_cpu_cap_clear(X86_FEATURE_SPEC_CTRL);
+	kvm_cpu_cap_clear(X86_FEATURE_AMD_STIBP);
+	kvm_cpu_cap_clear(X86_FEATURE_AMD_IBRS);
+	kvm_cpu_cap_clear(X86_FEATURE_AMD_SSBD);
+
+	/* PVM hypervisor hasn't implemented LAM so far */
+	kvm_cpu_cap_clear(X86_FEATURE_LAM);
+
+	/* Don't expose MSR_IA32_DEBUGCTLMSR related features. */
+	kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
+}
+
+static __init int hardware_setup(void)
+{
+	struct desc_ptr dt;
+
+	store_idt(&dt);
+	host_idt_base = dt.address;
+
+	pvm_set_cpu_caps();
+
+	kvm_configure_mmu(false, 0, 0, 0);
+
+	enable_apicv = 0;
+
+	return 0;
+}
+
+static void hardware_unsetup(void)
+{
+}
+
+struct kvm_x86_nested_ops pvm_nested_ops = {};
+
+static struct kvm_x86_ops pvm_x86_ops __initdata = {
+	.name = KBUILD_MODNAME,
+
+	.check_processor_compatibility = pvm_check_processor_compat,
+
+	.hardware_unsetup = hardware_unsetup,
+	.hardware_enable = hardware_enable,
+	.hardware_disable = hardware_disable,
+	.has_emulated_msr = pvm_has_emulated_msr,
+
+	.has_wbinvd_exit = cpu_has_pvm_wbinvd_exit,
+
+	.nested_ops = &pvm_nested_ops,
+
+	.setup_mce = pvm_setup_mce,
+};
+
+static struct kvm_x86_init_ops pvm_init_ops __initdata = {
+	.hardware_setup = hardware_setup,
+
+	.runtime_ops = &pvm_x86_ops,
+};
+
 static void pvm_exit(void)
 {
+	kvm_exit();
+	kvm_x86_vendor_exit();
+	host_mmu_destroy();
+	allow_smaller_maxphyaddr = false;
+	kvm_cpuid_vendor_signature = 0;
 }
 module_exit(pvm_exit);
 
+static int __init hardware_cap_check(void)
+{
+	/*
+	 * switcher can't be used when KPTI. See the comments above
+	 * SWITCHER_SAVE_AND_SWITCH_TO_HOST_CR3
+	 */
+	if (boot_cpu_has(X86_FEATURE_PTI)) {
+		pr_warn("Support for host KPTI is not included yet.\n");
+		return -EOPNOTSUPP;
+	}
+	if (!boot_cpu_has(X86_FEATURE_FSGSBASE)) {
+		pr_warn("FSGSBASE is required per PVM specification.\n");
+		return -EOPNOTSUPP;
+	}
+	if (!boot_cpu_has(X86_FEATURE_RDTSCP)) {
+		pr_warn("RDTSCP is required to support for getcpu in guest vdso.\n");
+		return -EOPNOTSUPP;
+	}
+	if (!boot_cpu_has(X86_FEATURE_CX16)) {
+		pr_warn("CMPXCHG16B is required for guest.\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 static int __init pvm_init(void)
 {
+	int r;
+
+	r = hardware_cap_check();
+	if (r)
+		return r;
+
+	r = host_mmu_init();
+	if (r)
+		return r;
+
+	is_intel = boot_cpu_data.x86_vendor == X86_VENDOR_INTEL;
+
+	r = kvm_x86_vendor_init(&pvm_init_ops);
+	if (r)
+		goto exit_host_mmu;
+
+	r = kvm_init(sizeof(struct vcpu_pvm), __alignof__(struct vcpu_pvm), THIS_MODULE);
+	if (r)
+		goto exit_vendor;
+
+	allow_smaller_maxphyaddr = true;
+	kvm_cpuid_vendor_signature = PVM_CPUID_SIGNATURE;
+
 	return 0;
+
+exit_vendor:
+	kvm_x86_vendor_exit();
+exit_host_mmu:
+	host_mmu_destroy();
+	return r;
 }
 module_init(pvm_init);
diff --git a/arch/x86/kvm/pvm/pvm.h b/arch/x86/kvm/pvm/pvm.h
index 7a3732986a6d..6149cf5975a4 100644
--- a/arch/x86/kvm/pvm/pvm.h
+++ b/arch/x86/kvm/pvm/pvm.h
@@ -2,6 +2,8 @@
 #ifndef __KVM_X86_PVM_H
 #define __KVM_X86_PVM_H
 
+#include <linux/kvm_host.h>
+
 #define PT_L4_SHIFT		39
 #define PT_L4_SIZE		(1UL << PT_L4_SHIFT)
 #define DEFAULT_RANGE_L4_SIZE	(32 * PT_L4_SIZE)
@@ -20,4 +22,22 @@ extern u64 *host_mmu_root_pgd;
 void host_mmu_destroy(void);
 int host_mmu_init(void);
 
+struct vcpu_pvm {
+	struct kvm_vcpu vcpu;
+};
+
+struct kvm_pvm {
+	struct kvm kvm;
+};
+
+static __always_inline struct kvm_pvm *to_kvm_pvm(struct kvm *kvm)
+{
+	return container_of(kvm, struct kvm_pvm, kvm);
+}
+
+static __always_inline struct vcpu_pvm *to_pvm(struct kvm_vcpu *vcpu)
+{
+	return container_of(vcpu, struct vcpu_pvm, vcpu);
+}
+
 #endif /* __KVM_X86_PVM_H */
-- 
2.19.1.6.gb485710b


  parent reply	other threads:[~2024-02-26 14:35 UTC|newest]

Thread overview: 82+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-02-26 14:35 [RFC PATCH 00/73] KVM: x86/PVM: Introduce a new hypervisor Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 01/73] KVM: Documentation: Add the specification for PVM Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 02/73] x86/ABI/PVM: Add PVM-specific ABI header file Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 03/73] x86/entry: Implement switcher for PVM VM enter/exit Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 04/73] x86/entry: Implement direct switching for the switcher Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 05/73] KVM: x86: Set 'vcpu->arch.exception.injected' as true before vendor callback Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 06/73] KVM: x86: Move VMX interrupt/nmi handling into kvm.ko Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 07/73] KVM: x86/mmu: Adapt shadow MMU for PVM Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 08/73] KVM: x86: Allow hypercall handling to not skip the instruction Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 09/73] KVM: x86: Add PVM virtual MSRs into emulated_msrs_all[] Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 10/73] KVM: x86: Introduce vendor feature to expose vendor-specific CPUID Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 11/73] KVM: x86: Implement gpc refresh for guest usage Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 12/73] KVM: x86: Add NR_VCPU_SREG in SREG enum Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 13/73] KVM: x86/emulator: Reinject #GP if instruction emulation failed for PVM Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 14/73] KVM: x86: Create stubs for PVM module as a new vendor Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 15/73] mm/vmalloc: Add a helper to reserve a contiguous and aligned kernel virtual area Lai Jiangshan
2024-02-27 14:56   ` Christoph Hellwig
2024-02-27 17:07     ` Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 16/73] KVM: x86/PVM: Implement host mmu initialization Lai Jiangshan
2024-02-26 14:35 ` Lai Jiangshan [this message]
2024-02-26 14:35 ` [RFC PATCH 18/73] KVM: x86/PVM: Implement VM/VCPU initialization related callbacks Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 19/73] x86/entry: Export 32-bit ignore syscall entry and __ia32_enabled variable Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 20/73] KVM: x86/PVM: Implement vcpu_load()/vcpu_put() related callbacks Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 21/73] KVM: x86/PVM: Implement vcpu_run() callbacks Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 22/73] KVM: x86/PVM: Handle some VM exits before enable interrupts Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 23/73] KVM: x86/PVM: Handle event handling related MSR read/write operation Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 24/73] KVM: x86/PVM: Introduce PVM mode switching Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 25/73] KVM: x86/PVM: Implement APIC emulation related callbacks Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 26/73] KVM: x86/PVM: Implement event delivery flags " Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 27/73] KVM: x86/PVM: Implement event injection " Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 28/73] KVM: x86/PVM: Handle syscall from user mode Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 29/73] KVM: x86/PVM: Implement allowed range checking for #PF Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 30/73] KVM: x86/PVM: Implement segment related callbacks Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 31/73] KVM: x86/PVM: Implement instruction emulation for #UD and #GP Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 32/73] KVM: x86/PVM: Enable guest debugging functions Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 33/73] KVM: x86/PVM: Handle VM-exit due to hardware exceptions Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 34/73] KVM: x86/PVM: Handle ERETU/ERETS synthetic instruction Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 35/73] KVM: x86/PVM: Handle PVM_SYNTHETIC_CPUID " Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 36/73] KVM: x86/PVM: Handle KVM hypercall Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 37/73] KVM: x86/PVM: Use host PCID to reduce guest TLB flushing Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 38/73] KVM: x86/PVM: Handle hypercalls for privilege instruction emulation Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 39/73] KVM: x86/PVM: Handle hypercall for CR3 switching Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 40/73] KVM: x86/PVM: Handle hypercall for loading GS selector Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 41/73] KVM: x86/PVM: Allow to load guest TLS in host GDT Lai Jiangshan
2024-02-26 14:35 ` [RFC PATCH 42/73] KVM: x86/PVM: Support for kvm_exit() tracepoint Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 43/73] KVM: x86/PVM: Enable direct switching Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 44/73] KVM: x86/PVM: Implement TSC related callbacks Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 45/73] KVM: x86/PVM: Add dummy PMU " Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 46/73] KVM: x86/PVM: Support for CPUID faulting Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 47/73] KVM: x86/PVM: Handle the left supported MSRs in msrs_to_save_base[] Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 48/73] KVM: x86/PVM: Implement system registers setting callbacks Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 49/73] KVM: x86/PVM: Implement emulation for non-PVM mode Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 50/73] x86/tools/relocs: Cleanup cmdline options Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 51/73] x86/tools/relocs: Append relocations into input file Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 52/73] x86/boot: Allow to do relocation for uncompressed kernel Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 53/73] x86/pvm: Add Kconfig option and the CPU feature bit for PVM guest Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 54/73] x86/pvm: Detect PVM hypervisor support Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 55/73] x86/pvm: Relocate kernel image to specific virtual address range Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 56/73] x86/pvm: Relocate kernel image early in PVH entry Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 57/73] x86/pvm: Make cpu entry area and vmalloc area variable Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 58/73] x86/pvm: Relocate kernel address space layout Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 59/73] x86/pti: Force enabling KPTI for PVM guest Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 60/73] x86/pvm: Add event entry/exit and dispatch code Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 61/73] x86/pvm: Allow to install a system interrupt handler Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 62/73] x86/pvm: Add early kernel event entry and dispatch code Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 63/73] x86/pvm: Add hypercall support Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 64/73] x86/pvm: Enable PVM event delivery Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 65/73] x86/kvm: Patch KVM hypercall as PVM hypercall Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 66/73] x86/pvm: Use new cpu feature to describe XENPV and PVM Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 67/73] x86/pvm: Implement cpu related PVOPS Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 68/73] x86/pvm: Implement irq " Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 69/73] x86/pvm: Implement mmu " Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 70/73] x86/pvm: Don't use SWAPGS for gsbase read/write Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 71/73] x86/pvm: Adapt pushf/popf in this_cpu_cmpxchg16b_emu() Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 72/73] x86/pvm: Use RDTSCP as default in vdso_read_cpunode() Lai Jiangshan
2024-02-26 14:36 ` [RFC PATCH 73/73] x86/pvm: Disable some unsupported syscalls and features Lai Jiangshan
2024-02-26 14:49 ` [RFC PATCH 00/73] KVM: x86/PVM: Introduce a new hypervisor Paolo Bonzini
2024-02-27 17:27   ` Sean Christopherson
2024-02-29  9:33     ` David Woodhouse
2024-03-01 14:00     ` Lai Jiangshan
2024-02-29 14:55   ` Lai Jiangshan
2024-03-06 11:05 ` Like Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240226143630.33643-18-jiangshanlai@gmail.com \
    --to=jiangshanlai@gmail.com \
    --cc=bp@alien8.de \
    --cc=dave.hansen@linux.intel.com \
    --cc=houwenlong.hwl@antgroup.com \
    --cc=hpa@zytor.com \
    --cc=jgross@suse.com \
    --cc=jiangshan.ljs@antgroup.com \
    --cc=keescook@chromium.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=peterz@infradead.org \
    --cc=seanjc@google.com \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox