All of lore.kernel.org
 help / color / mirror / Atom feed
From: David Stevens <stevensd@chromium.org>
To: kvm@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
	Sean Christopherson <seanjc@google.com>,
	Vitaly Kuznetsov <vkuznets@redhat.com>,
	Wanpeng Li <wanpengli@tencent.com>,
	Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
	David Stevens <stevensd@chromium.org>
Subject: [PATCH 3/3] KVM: VMX: skip page tracking based on cpuid
Date: Tue, 21 Sep 2021 17:10:10 +0900	[thread overview]
Message-ID: <20210921081010.457591-4-stevensd@google.com> (raw)
In-Reply-To: <20210921081010.457591-1-stevensd@google.com>

From: David Stevens <stevensd@chromium.org>

Consider X86_FEATURE_VMX when determining whether or not page tracking
is necessary, since that flag can be used to control whether or not a
particular guest supports nested virtualization. When the flag is
toggled, it is necessary to free/allocate gfn_track arrays for any
already existing slots.

If cpuid is heterogeneous or is modified after KVM_RUN, page tracking
may not work properly. This may cause guest instability, as per the
caveat on KVM_SET_CPUID{,2}. Host stability should not be affected.

Signed-off-by: David Stevens <stevensd@chromium.org>
---
 arch/x86/include/asm/kvm_host.h       |  2 +-
 arch/x86/include/asm/kvm_page_track.h |  2 +
 arch/x86/kvm/cpuid.c                  | 55 +++++++++++++++++++++------
 arch/x86/kvm/mmu/page_track.c         | 49 ++++++++++++++++++++++++
 arch/x86/kvm/svm/svm.c                |  4 +-
 arch/x86/kvm/vmx/vmx.c                |  7 +++-
 6 files changed, 104 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ba29f64d16d3..dc732ac0eb56 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1302,7 +1302,7 @@ struct kvm_x86_ops {
 	void (*hardware_unsetup)(void);
 	bool (*cpu_has_accelerated_tpr)(void);
 	bool (*has_emulated_msr)(struct kvm *kvm, u32 index);
-	void (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
+	int (*vcpu_after_set_cpuid)(struct kvm_vcpu *vcpu);
 	bool (*nested_enabled)(void);
 
 	unsigned int vm_size;
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index 92e9e2c74219..f917cea1b4b1 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -49,6 +49,8 @@ struct kvm_page_track_notifier_node {
 void kvm_page_track_init(struct kvm *kvm, bool mmu_uses_page_tracking);
 void kvm_page_track_cleanup(struct kvm *kvm);
 
+int kvm_page_tracking_mmu_enable(struct kvm *kvm, bool enable);
+
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot);
 int kvm_page_track_create_memslot(struct kvm *kvm,
 				  struct kvm_memory_slot *slot,
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 739be5da3bca..fb011b0e76e6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -150,10 +150,11 @@ void kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_update_cpuid_runtime);
 
-static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+static int kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	struct kvm_cpuid_entry2 *best;
+	int ret;
 
 	best = kvm_find_cpuid_entry(vcpu, 1, 0);
 	if (best && apic) {
@@ -198,14 +199,20 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 
 	kvm_hv_set_cpuid(vcpu);
 
-	/* Invoke the vendor callback only after the above state is updated. */
-	static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
+	/*
+	 * Invoke the vendor callback only after the above state is updated. If
+	 * it fails, continue the function and let the caller roll back to the
+	 * previous cpuid.
+	 */
+	ret = static_call(kvm_x86_vcpu_after_set_cpuid)(vcpu);
 
 	/*
 	 * Except for the MMU, which needs to do its thing any vendor specific
 	 * adjustments to the reserved GPA bits.
 	 */
 	kvm_mmu_after_set_cpuid(vcpu);
+
+	return ret;
 }
 
 static int is_efer_nx(void)
@@ -261,9 +268,9 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 			     struct kvm_cpuid *cpuid,
 			     struct kvm_cpuid_entry __user *entries)
 {
-	int r, i;
+	int r, i, orig_nent;
 	struct kvm_cpuid_entry *e = NULL;
-	struct kvm_cpuid_entry2 *e2 = NULL;
+	struct kvm_cpuid_entry2 *e2 = NULL, *orig_e2;
 
 	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
 		return -E2BIG;
@@ -298,13 +305,25 @@ int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
 		goto out_free_cpuid;
 	}
 
-	kvfree(vcpu->arch.cpuid_entries);
+	orig_e2 = vcpu->arch.cpuid_entries;
+	orig_nent = vcpu->arch.cpuid_nent;
+
 	vcpu->arch.cpuid_entries = e2;
 	vcpu->arch.cpuid_nent = cpuid->nent;
 
 	cpuid_fix_nx_cap(vcpu);
 	kvm_update_cpuid_runtime(vcpu);
-	kvm_vcpu_after_set_cpuid(vcpu);
+	r = kvm_vcpu_after_set_cpuid(vcpu);
+
+	if (!r) {
+		kvfree(orig_e2);
+	} else {
+		kvfree(e2);
+		vcpu->arch.cpuid_entries = orig_e2;
+		vcpu->arch.cpuid_nent = orig_nent;
+		kvm_update_cpuid_runtime(vcpu);
+		kvm_vcpu_after_set_cpuid(vcpu);
+	}
 
 out_free_cpuid:
 	kvfree(e);
@@ -316,8 +335,8 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
 			      struct kvm_cpuid2 *cpuid,
 			      struct kvm_cpuid_entry2 __user *entries)
 {
-	struct kvm_cpuid_entry2 *e2 = NULL;
-	int r;
+	struct kvm_cpuid_entry2 *e2 = NULL, *orig_e2;
+	int r, orig_nent;
 
 	if (cpuid->nent > KVM_MAX_CPUID_ENTRIES)
 		return -E2BIG;
@@ -334,14 +353,26 @@ int kvm_vcpu_ioctl_set_cpuid2(struct kvm_vcpu *vcpu,
 		return r;
 	}
 
-	kvfree(vcpu->arch.cpuid_entries);
+	orig_e2 = vcpu->arch.cpuid_entries;
+	orig_nent = vcpu->arch.cpuid_nent;
+
 	vcpu->arch.cpuid_entries = e2;
 	vcpu->arch.cpuid_nent = cpuid->nent;
 
 	kvm_update_cpuid_runtime(vcpu);
-	kvm_vcpu_after_set_cpuid(vcpu);
+	r = kvm_vcpu_after_set_cpuid(vcpu);
 
-	return 0;
+	if (!r) {
+		kvfree(orig_e2);
+	} else {
+		kvfree(e2);
+		vcpu->arch.cpuid_entries = orig_e2;
+		vcpu->arch.cpuid_nent = orig_nent;
+		kvm_update_cpuid_runtime(vcpu);
+		kvm_vcpu_after_set_cpuid(vcpu);
+	}
+
+	return r;
 }
 
 int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c
index c553bc09d50a..e4d0e6ad2178 100644
--- a/arch/x86/kvm/mmu/page_track.c
+++ b/arch/x86/kvm/mmu/page_track.c
@@ -19,6 +19,55 @@
 #include "mmu.h"
 #include "mmu_internal.h"
 
+int kvm_page_tracking_mmu_enable(struct kvm *kvm, bool enable)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *s;
+	unsigned short **gfn_track, *buf;
+	int i, ret = 0;
+
+	if (IS_ENABLED(CONFIG_KVM_EXTERNAL_WRITE_TRACKING))
+		return 0;
+
+	mutex_lock(&kvm->slots_lock);
+
+	if (enable == kvm->arch.mmu_uses_page_tracking) {
+		mutex_unlock(&kvm->slots_lock);
+		return 0;
+	}
+
+	for (i = 0; enable && i < KVM_ADDRESS_SPACE_NUM && !ret; i++) {
+		slots = __kvm_memslots(kvm, i);
+		kvm_for_each_memslot(s, slots) {
+			gfn_track = s->arch.gfn_track + KVM_PAGE_TRACK_WRITE;
+			*gfn_track = kvcalloc(s->npages, sizeof(*gfn_track),
+					      GFP_KERNEL_ACCOUNT);
+			if (!*gfn_track) {
+				ret = -ENOMEM;
+				break;
+			}
+		}
+	}
+
+	for (i = 0; (!enable || !ret) && i < KVM_ADDRESS_SPACE_NUM; i++) {
+		slots = __kvm_memslots(kvm, i);
+		kvm_for_each_memslot(s, slots) {
+			gfn_track = s->arch.gfn_track + KVM_PAGE_TRACK_WRITE;
+			buf = *gfn_track;
+			*gfn_track = NULL;
+			synchronize_srcu(&kvm->srcu);
+			kvfree(buf);
+		}
+	}
+
+	if (!ret)
+		kvm->arch.mmu_uses_page_tracking = enable;
+
+	mutex_unlock(&kvm->slots_lock);
+
+	return ret;
+}
+
 void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
 {
 	int i;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index bcc8f887d3bd..af904e9f4be9 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3981,7 +3981,7 @@ static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
 	return 0;
 }
 
-static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+static int svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct kvm_cpuid_entry2 *best;
@@ -4047,6 +4047,8 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_EIP, 1, 1);
 		set_msr_interception(vcpu, svm->msrpm, MSR_IA32_SYSENTER_ESP, 1, 1);
 	}
+
+	return 0;
 }
 
 static bool svm_nested_enabled(void)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 1b21ed01e837..6455759907d1 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7166,7 +7166,7 @@ static void update_intel_pt_cfg(struct kvm_vcpu *vcpu)
 		vmx->pt_desc.ctl_bitmask &= ~(0xfULL << (32 + i * 4));
 }
 
-static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
+static int vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
@@ -7223,6 +7223,11 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 
 	/* Refresh #PF interception to account for MAXPHYADDR changes. */
 	vmx_update_exception_bitmap(vcpu);
+
+	if (tdp_enabled)
+		return kvm_page_tracking_mmu_enable(vcpu->kvm,
+						    nested_vmx_allowed(vcpu));
+	return 0;
 }
 
 static bool vmx_nested_enabled(void)
-- 
2.33.0.464.g1972c5931b-goog


  parent reply	other threads:[~2021-09-21  8:10 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-21  8:10 [PATCH 0/3] KVM: x86: skip gfn_track allocation when possible David Stevens
2021-09-21  8:10 ` [PATCH 1/3] KVM: x86: add config for non-kvm users of page tracking David Stevens
2021-09-21  8:10 ` [PATCH 2/3] KVM: x86/mmu: skip page tracking when possible David Stevens
2021-09-21  8:10 ` David Stevens [this message]
2021-09-21 10:27 ` [PATCH 0/3] KVM: x86: skip gfn_track allocation " Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210921081010.457591-4-stevensd@google.com \
    --to=stevensd@chromium.org \
    --cc=jmattson@google.com \
    --cc=joro@8bytes.org \
    --cc=kvm@vger.kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=seanjc@google.com \
    --cc=vkuznets@redhat.com \
    --cc=wanpengli@tencent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.