All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ben Gardon <bgardon@google.com>
To: kvm@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	Peter Feiner <pfeiner@google.com>,
	Peter Shier <pshier@google.com>,
	Junaid Shahid <junaids@google.com>,
	Jim Mattson <jmattson@google.com>,
	Ben Gardon <bgardon@google.com>
Subject: [RFC PATCH 08/28] kvm: mmu: Init / Uninit the direct MMU
Date: Thu, 26 Sep 2019 16:18:04 -0700	[thread overview]
Message-ID: <20190926231824.149014-9-bgardon@google.com> (raw)
In-Reply-To: <20190926231824.149014-1-bgardon@google.com>

The direct MMU introduces several new fields that need to be initialized
and torn down. Add functions to do that initialization / cleanup.

Signed-off-by: Ben Gardon <bgardon@google.com>
---
 arch/x86/include/asm/kvm_host.h |  51 ++++++++----
 arch/x86/kvm/mmu.c              | 132 +++++++++++++++++++++++++++++---
 arch/x86/kvm/x86.c              |  16 +++-
 3 files changed, 169 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 23edf56cf577c..1f8164c577d50 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -236,6 +236,22 @@ enum {
  */
 #define KVM_APIC_PV_EOI_PENDING	1
 
+#define HF_GIF_MASK		(1 << 0)
+#define HF_HIF_MASK		(1 << 1)
+#define HF_VINTR_MASK		(1 << 2)
+#define HF_NMI_MASK		(1 << 3)
+#define HF_IRET_MASK		(1 << 4)
+#define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
+#define HF_SMM_MASK		(1 << 6)
+#define HF_SMM_INSIDE_NMI_MASK	(1 << 7)
+
+#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+#define KVM_ADDRESS_SPACE_NUM 2
+
+#define kvm_arch_vcpu_memslots_id(vcpu) \
+		((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+
 struct kvm_kernel_irq_routing_entry;
 
 /*
@@ -940,6 +956,24 @@ struct kvm_arch {
 	bool exception_payload_enabled;
 
 	struct kvm_pmu_event_filter *pmu_event_filter;
+
+	/*
+	 * Whether the direct MMU is enabled for this VM. This contains a
+	 * snapshot of the direct MMU module parameter from when the VM was
+	 * created and remains unchanged for the life of the VM. If this is
+	 * true, direct MMU handler functions will run for various MMU
+	 * operations.
+	 */
+	bool direct_mmu_enabled;
+	/*
+	 * Indicates that the paging structure built by the direct MMU is
+	 * currently the only one in use. If nesting is used, prompting the
+	 * creation of shadow page tables for L2, this will be set to false.
+	 * While this is true, only direct MMU handlers will be run for many
+	 * MMU functions. Ignored if !direct_mmu_enabled.
+	 */
+	bool pure_direct_mmu;
+	hpa_t direct_root_hpa[KVM_ADDRESS_SPACE_NUM];
 };
 
 struct kvm_vm_stat {
@@ -1255,7 +1289,7 @@ void kvm_mmu_module_exit(void);
 
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
-void kvm_mmu_init_vm(struct kvm *kvm);
+int kvm_mmu_init_vm(struct kvm *kvm);
 void kvm_mmu_uninit_vm(struct kvm *kvm);
 void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
 		u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
@@ -1519,21 +1553,6 @@ enum {
 	TASK_SWITCH_GATE = 3,
 };
 
-#define HF_GIF_MASK		(1 << 0)
-#define HF_HIF_MASK		(1 << 1)
-#define HF_VINTR_MASK		(1 << 2)
-#define HF_NMI_MASK		(1 << 3)
-#define HF_IRET_MASK		(1 << 4)
-#define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
-#define HF_SMM_MASK		(1 << 6)
-#define HF_SMM_INSIDE_NMI_MASK	(1 << 7)
-
-#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
-#define KVM_ADDRESS_SPACE_NUM 2
-
-#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
-#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
-
 asmlinkage void kvm_spurious_fault(void);
 
 /*
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 50413f17c7cd0..788edbda02f69 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -47,6 +47,10 @@
 #include <asm/kvm_page_track.h>
 #include "trace.h"
 
+static bool __read_mostly direct_mmu_enabled;
+module_param_named(enable_direct_mmu, direct_mmu_enabled, bool,
+		   S_IRUGO | S_IWUSR);
+
 /*
  * When setting this variable to true it enables Two-Dimensional-Paging
  * where the hardware walks 2 page tables:
@@ -3754,27 +3758,56 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
 	*root_hpa = INVALID_PAGE;
 }
 
+static bool is_direct_mmu_root(struct kvm *kvm, hpa_t root)
+{
+	int as_id;
+
+	for (as_id = 0; as_id < KVM_ADDRESS_SPACE_NUM; as_id++)
+		if (root == kvm->arch.direct_root_hpa[as_id])
+			return true;
+
+	return false;
+}
+
 /* roots_to_free must be some combination of the KVM_MMU_ROOT_* flags */
 void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			ulong roots_to_free)
 {
 	int i;
 	LIST_HEAD(invalid_list);
-	bool free_active_root = roots_to_free & KVM_MMU_ROOT_CURRENT;
 
 	BUILD_BUG_ON(KVM_MMU_NUM_PREV_ROOTS >= BITS_PER_LONG);
 
-	/* Before acquiring the MMU lock, see if we need to do any real work. */
-	if (!(free_active_root && VALID_PAGE(mmu->root_hpa))) {
-		for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
-			if ((roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) &&
-			    VALID_PAGE(mmu->prev_roots[i].hpa))
-				break;
+	/*
+	 * Direct MMU paging structures follow the life of the VM, so instead of
+	 * destroying direct MMU paging structure root, simply mark the root
+	 * HPA pointing to it as invalid.
+	 */
+	if (vcpu->kvm->arch.direct_mmu_enabled &&
+	    roots_to_free & KVM_MMU_ROOT_CURRENT &&
+	    is_direct_mmu_root(vcpu->kvm, mmu->root_hpa))
+		mmu->root_hpa = INVALID_PAGE;
 
-		if (i == KVM_MMU_NUM_PREV_ROOTS)
-			return;
+	if (!VALID_PAGE(mmu->root_hpa))
+		roots_to_free &= ~KVM_MMU_ROOT_CURRENT;
+
+	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+		if (roots_to_free & KVM_MMU_ROOT_PREVIOUS(i)) {
+			if (is_direct_mmu_root(vcpu->kvm,
+					       mmu->prev_roots[i].hpa))
+				mmu->prev_roots[i].hpa = INVALID_PAGE;
+			if (!VALID_PAGE(mmu->prev_roots[i].hpa))
+				roots_to_free &= ~KVM_MMU_ROOT_PREVIOUS(i);
+		}
 	}
 
+	/*
+	 * If there are no valid roots that need freeing at this point, avoid
+	 * acquiring the MMU lock and return.
+	 */
+	if (!roots_to_free)
+		return;
+
 	write_lock(&vcpu->kvm->mmu_lock);
 
 	for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++)
@@ -3782,7 +3815,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 			mmu_free_root_page(vcpu->kvm, &mmu->prev_roots[i].hpa,
 					   &invalid_list);
 
-	if (free_active_root) {
+	if (roots_to_free & KVM_MMU_ROOT_CURRENT) {
 		if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL &&
 		    (mmu->root_level >= PT64_ROOT_4LEVEL || mmu->direct_map)) {
 			mmu_free_root_page(vcpu->kvm, &mmu->root_hpa,
@@ -3820,7 +3853,12 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
 	struct kvm_mmu_page *sp;
 	unsigned i;
 
-	if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
+	if (vcpu->kvm->arch.direct_mmu_enabled) {
+		// TODO: Support 5 level paging in the direct MMU
+		BUG_ON(vcpu->arch.mmu->shadow_root_level > PT64_ROOT_4LEVEL);
+		vcpu->arch.mmu->root_hpa = vcpu->kvm->arch.direct_root_hpa[
+			kvm_arch_vcpu_memslots_id(vcpu)];
+	} else if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) {
 		write_lock(&vcpu->kvm->mmu_lock);
 		if(make_mmu_pages_available(vcpu) < 0) {
 			write_unlock(&vcpu->kvm->mmu_lock);
@@ -3863,6 +3901,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
 	gfn_t root_gfn, root_cr3;
 	int i;
 
+	write_lock(&vcpu->kvm->mmu_lock);
+	vcpu->kvm->arch.pure_direct_mmu = false;
+	write_unlock(&vcpu->kvm->mmu_lock);
+
 	root_cr3 = vcpu->arch.mmu->get_cr3(vcpu);
 	root_gfn = root_cr3 >> PAGE_SHIFT;
 
@@ -5710,6 +5752,64 @@ void kvm_disable_tdp(void)
 }
 EXPORT_SYMBOL_GPL(kvm_disable_tdp);
 
+static bool is_direct_mmu_enabled(void)
+{
+	if (!READ_ONCE(direct_mmu_enabled))
+		return false;
+
+	if (WARN_ONCE(!tdp_enabled,
+		      "Creating a VM with direct MMU enabled requires TDP."))
+		return false;
+
+	return true;
+}
+
+static int kvm_mmu_init_direct_mmu(struct kvm *kvm)
+{
+	struct page *page;
+	int i;
+
+	if (!is_direct_mmu_enabled())
+		return 0;
+
+	/*
+	 * Allocate the direct MMU root pages. These pages follow the life of
+	 * the VM.
+	 */
+	for (i = 0; i < ARRAY_SIZE(kvm->arch.direct_root_hpa); i++) {
+		page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+		if (!page)
+			goto err;
+		kvm->arch.direct_root_hpa[i] = page_to_phys(page);
+	}
+
+	/* This should not be changed for the lifetime of the VM. */
+	kvm->arch.direct_mmu_enabled = true;
+
+	kvm->arch.pure_direct_mmu = true;
+	return 0;
+err:
+	for (i = 0; i < ARRAY_SIZE(kvm->arch.direct_root_hpa); i++) {
+		if (kvm->arch.direct_root_hpa[i] &&
+		    VALID_PAGE(kvm->arch.direct_root_hpa[i]))
+			free_page((unsigned long)kvm->arch.direct_root_hpa[i]);
+		kvm->arch.direct_root_hpa[i] = INVALID_PAGE;
+	}
+	return -ENOMEM;
+}
+
+static void kvm_mmu_uninit_direct_mmu(struct kvm *kvm)
+{
+	int i;
+
+	if (!kvm->arch.direct_mmu_enabled)
+		return;
+
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+		handle_disconnected_pt(kvm, i, 0,
+			(kvm_pfn_t)(kvm->arch.direct_root_hpa[i] >> PAGE_SHIFT),
+			PT64_ROOT_4LEVEL);
+}
 
 /* The return value indicates if tlb flush on all vcpus is needed. */
 typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
@@ -5956,13 +6056,19 @@ static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
 	kvm_mmu_zap_all_fast(kvm);
 }
 
-void kvm_mmu_init_vm(struct kvm *kvm)
+int kvm_mmu_init_vm(struct kvm *kvm)
 {
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
+	int r;
+
+	r = kvm_mmu_init_direct_mmu(kvm);
+	if (r)
+		return r;
 
 	node->track_write = kvm_mmu_pte_write;
 	node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
 	kvm_page_track_register_notifier(kvm, node);
+	return 0;
 }
 
 void kvm_mmu_uninit_vm(struct kvm *kvm)
@@ -5970,6 +6076,8 @@ void kvm_mmu_uninit_vm(struct kvm *kvm)
 	struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
 
 	kvm_page_track_unregister_notifier(kvm, node);
+
+	kvm_mmu_uninit_direct_mmu(kvm);
 }
 
 void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9ecf83da396c9..2972b6c6029fb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9421,6 +9421,8 @@ void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
+	int err;
+
 	if (type)
 		return -EINVAL;
 
@@ -9450,9 +9452,19 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	kvm_hv_init_vm(kvm);
 	kvm_page_track_init(kvm);
-	kvm_mmu_init_vm(kvm);
+	err = kvm_mmu_init_vm(kvm);
+	if (err)
+		return err;
+
+	err = kvm_x86_ops->vm_init(kvm);
+	if (err)
+		goto error;
+
+	return 0;
 
-	return kvm_x86_ops->vm_init(kvm);
+error:
+	kvm_mmu_uninit_vm(kvm);
+	return err;
 }
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
-- 
2.23.0.444.g18eeb5a265-goog


  parent reply	other threads:[~2019-09-26 23:18 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-09-26 23:17 [RFC PATCH 00/28] kvm: mmu: Rework the x86 TDP direct mapped case Ben Gardon
2019-09-26 23:17 ` [RFC PATCH 01/28] kvm: mmu: Separate generating and setting mmio ptes Ben Gardon
2019-11-27 18:15   ` Sean Christopherson
2019-09-26 23:17 ` [RFC PATCH 02/28] kvm: mmu: Separate pte generation from set_spte Ben Gardon
2019-11-27 18:25   ` Sean Christopherson
2019-09-26 23:17 ` [RFC PATCH 03/28] kvm: mmu: Zero page cache memory at allocation time Ben Gardon
2019-11-27 18:32   ` Sean Christopherson
2019-09-26 23:18 ` [RFC PATCH 04/28] kvm: mmu: Update the lpages stat atomically Ben Gardon
2019-11-27 18:39   ` Sean Christopherson
2019-12-06 20:10     ` Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 05/28] sched: Add cond_resched_rwlock Ben Gardon
2019-11-27 18:42   ` Sean Christopherson
2019-12-06 20:12     ` Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 06/28] kvm: mmu: Replace mmu_lock with a read/write lock Ben Gardon
2019-11-27 18:47   ` Sean Christopherson
2019-12-02 22:45     ` Sean Christopherson
2019-09-26 23:18 ` [RFC PATCH 07/28] kvm: mmu: Add functions for handling changed PTEs Ben Gardon
2019-11-27 19:04   ` Sean Christopherson
2019-09-26 23:18 ` Ben Gardon [this message]
2019-12-02 23:40   ` [RFC PATCH 08/28] kvm: mmu: Init / Uninit the direct MMU Sean Christopherson
2019-12-06 20:25     ` Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 09/28] kvm: mmu: Free direct MMU page table memory in an RCU callback Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 10/28] kvm: mmu: Flush TLBs before freeing direct MMU page table memory Ben Gardon
2019-12-02 23:46   ` Sean Christopherson
2019-12-06 20:31     ` Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 11/28] kvm: mmu: Optimize for freeing direct MMU PTs on teardown Ben Gardon
2019-12-02 23:54   ` Sean Christopherson
2019-09-26 23:18 ` [RFC PATCH 12/28] kvm: mmu: Set tlbs_dirty atomically Ben Gardon
2019-12-03  0:13   ` Sean Christopherson
2019-09-26 23:18 ` [RFC PATCH 13/28] kvm: mmu: Add an iterator for concurrent paging structure walks Ben Gardon
2019-12-03  2:15   ` Sean Christopherson
2019-12-18 18:25     ` Ben Gardon
2019-12-18 19:14       ` Sean Christopherson
2019-09-26 23:18 ` [RFC PATCH 14/28] kvm: mmu: Batch updates to the direct mmu disconnected list Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 15/28] kvm: mmu: Support invalidate_zap_all_pages Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 16/28] kvm: mmu: Add direct MMU page fault handler Ben Gardon
2020-01-08 17:20   ` Peter Xu
2020-01-08 18:15     ` Ben Gardon
2020-01-08 19:00       ` Peter Xu
2019-09-26 23:18 ` [RFC PATCH 17/28] kvm: mmu: Add direct MMU fast " Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 18/28] kvm: mmu: Add an hva range iterator for memslot GFNs Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 19/28] kvm: mmu: Make address space ID a property of memslots Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 20/28] kvm: mmu: Implement the invalidation MMU notifiers for the direct MMU Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 21/28] kvm: mmu: Integrate the direct mmu with the changed pte notifier Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 22/28] kvm: mmu: Implement access tracking for the direct MMU Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 23/28] kvm: mmu: Make mark_page_dirty_in_slot usable from outside kvm_main Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 24/28] kvm: mmu: Support dirty logging in the direct MMU Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 25/28] kvm: mmu: Support kvm_zap_gfn_range " Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 26/28] kvm: mmu: Integrate direct MMU with nesting Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 27/28] kvm: mmu: Lazily allocate rmap when direct MMU is enabled Ben Gardon
2019-09-26 23:18 ` [RFC PATCH 28/28] kvm: mmu: Support MMIO in the direct MMU Ben Gardon
2019-10-17 18:50 ` [RFC PATCH 00/28] kvm: mmu: Rework the x86 TDP direct mapped case Sean Christopherson
2019-10-18 13:42   ` Paolo Bonzini
2019-11-27 19:09 ` Sean Christopherson
2019-12-06 19:55   ` Ben Gardon
2019-12-06 19:57     ` Sean Christopherson
2019-12-06 20:42       ` Ben Gardon

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190926231824.149014-9-bgardon@google.com \
    --to=bgardon@google.com \
    --cc=jmattson@google.com \
    --cc=junaids@google.com \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=pfeiner@google.com \
    --cc=pshier@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.