From: Sean Christopherson <seanjc@google.com>
To: Paolo Bonzini <pbonzini@redhat.com>,
Christian Borntraeger <borntraeger@linux.ibm.com>,
Janosch Frank <frankja@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>,
Claudio Imbrenda <imbrenda@linux.ibm.com>,
Sean Christopherson <seanjc@google.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
Wanpeng Li <wanpengli@tencent.com>,
Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
kvm@vger.kernel.org, linux-kernel@vger.kernel.org,
Maxim Levitsky <mlevitsk@redhat.com>,
Ben Gardon <bgardon@google.com>,
Lai Jiangshan <jiangshanlai@gmail.com>
Subject: [PATCH 4/7] KVM: x86/mmu: Zap only obsolete roots if a root shadow page is zapped
Date: Thu, 9 Dec 2021 06:05:49 +0000 [thread overview]
Message-ID: <20211209060552.2956723-5-seanjc@google.com> (raw)
In-Reply-To: <20211209060552.2956723-1-seanjc@google.com>
Zap only obsolete roots when responding to zapping a single root shadow
page. Because KVM keeps root_count elevated when stuffing a previous
root into its PGD cache, shadowing a 64-bit guest means that zapping any
root causes all vCPUs to reload all roots, even if their current root is
not affected by the zap.
For many kernels, zapping a single root is a frequent operation, e.g. in
Linux it happens whenever an mm is dropped, e.g. process exits, etc...
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/include/asm/kvm_host.h | 2 +
arch/x86/kvm/mmu.h | 1 +
arch/x86/kvm/mmu/mmu.c | 66 +++++++++++++++++++++++++++++----
arch/x86/kvm/x86.c | 4 +-
4 files changed, 64 insertions(+), 9 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d5fede05eb5f..62e5e842b692 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -102,6 +102,8 @@
#define KVM_REQ_MSR_FILTER_CHANGED KVM_ARCH_REQ(29)
#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
+ KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define CR0_RESERVED_BITS \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e9fbb2c8bbe2..923e0e95e7d7 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -79,6 +79,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
+void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_prev_roots(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 31605cd3c09f..b6115d8ea696 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2345,7 +2345,7 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
struct list_head *invalid_list,
int *nr_zapped)
{
- bool list_unstable;
+ bool list_unstable, zapped_root = false;
trace_kvm_mmu_prepare_zap_page(sp);
++kvm->stat.mmu_shadow_zapped;
@@ -2387,14 +2387,20 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
* in kvm_mmu_zap_all_fast(). Note, is_obsolete_sp() also
* treats invalid shadow pages as being obsolete.
*/
- if (!is_obsolete_sp(kvm, sp))
- kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+ zapped_root = !is_obsolete_sp(kvm, sp);
}
if (sp->lpage_disallowed)
unaccount_huge_nx_page(kvm, sp);
sp->role.invalid = 1;
+
+ /*
+ * Make the request to free obsolete roots after marking the root
+ * invalid, otherwise other vCPUs may not see it as invalid.
+ */
+ if (zapped_root)
+ kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS);
return list_unstable;
}
@@ -3985,7 +3991,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
* previous root, then __kvm_mmu_prepare_zap_page() signals all vCPUs
* to reload even if no vCPU is actively using the root.
*/
- if (!sp && kvm_test_request(KVM_REQ_MMU_RELOAD, vcpu))
+ if (!sp && kvm_test_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
return true;
return fault->slot &&
@@ -4187,8 +4193,8 @@ static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd,
/*
* It's possible that the cached previous root page is obsolete because
* of a change in the MMU generation number. However, changing the
- * generation number is accompanied by KVM_REQ_MMU_RELOAD, which will
- * free the root set here and allocate a new one.
+ * generation number is accompanied by KVM_REQ_MMU_FREE_OBSOLETE_ROOTS,
+ * which will free the root set here and allocate a new one.
*/
kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu);
@@ -5113,6 +5119,52 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu)
WARN_ON(VALID_PAGE(vcpu->arch.guest_mmu.root_hpa));
}
+static bool is_obsolete_root(struct kvm_vcpu *vcpu, hpa_t root_hpa)
+{
+ struct kvm_mmu_page *sp;
+
+ if (!VALID_PAGE(root_hpa))
+ return false;
+
+ /*
+ * When freeing obsolete roots, treat roots as obsolete if they don't
+ * have an associated shadow page. This does mean KVM will get false
+ * positives and free roots that don't strictly need to be freed, but
+ * such false positives are relatively rare:
+ *
+ * (a) only PAE paging and nested NPT has roots without shadow pages
+ * (b) remote reloads due to a memslot update obsoletes _all_ roots
+ * (c) KVM doesn't track previous roots for PAE paging, and the guest
+ * is unlikely to zap an in-use PGD.
+ */
+ sp = to_shadow_page(root_hpa);
+ return !sp || is_obsolete_sp(vcpu->kvm, sp);
+}
+
+static void __kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu,
+ struct kvm_mmu *mmu)
+{
+ unsigned long roots_to_free = 0;
+ int i;
+
+ if (is_obsolete_root(vcpu, mmu->root_hpa))
+ roots_to_free |= KVM_MMU_ROOT_CURRENT;
+
+ for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+ if (is_obsolete_root(vcpu, mmu->root_hpa))
+ roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
+ }
+
+ if (roots_to_free)
+ kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
+}
+
+void kvm_mmu_free_obsolete_roots(struct kvm_vcpu *vcpu)
+{
+ __kvm_mmu_free_obsolete_roots(vcpu, &vcpu->arch.root_mmu);
+ __kvm_mmu_free_obsolete_roots(vcpu, &vcpu->arch.guest_mmu);
+}
+
static bool need_remote_flush(u64 old, u64 new)
{
if (!is_shadow_present_pte(old))
@@ -5686,7 +5738,7 @@ static void kvm_mmu_zap_all_fast(struct kvm *kvm)
* Note: we need to do this under the protection of mmu_lock,
* otherwise, vcpu would purge shadow page but miss tlb flush.
*/
- kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+ kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_FREE_OBSOLETE_ROOTS);
kvm_zap_obsolete_pages(kvm);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ca1f0350a868..35a4f07b4f40 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9714,8 +9714,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
goto out;
}
}
- if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu))
- kvm_mmu_unload(vcpu);
+ if (kvm_check_request(KVM_REQ_MMU_FREE_OBSOLETE_ROOTS, vcpu))
+ kvm_mmu_free_obsolete_roots(vcpu);
if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu))
__kvm_migrate_timers(vcpu);
if (kvm_check_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu))
--
2.34.1.400.ga245620fadb-goog
next prev parent reply other threads:[~2021-12-09 6:06 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-12-09 6:05 [PATCH 0/7] KVM: x86/mmu: Obsolete root shadow page fix Sean Christopherson
2021-12-09 6:05 ` [PATCH 1/7] KVM: x86: Retry page fault if MMU reload is pending and root has no sp Sean Christopherson
2021-12-09 11:19 ` Paolo Bonzini
2021-12-10 12:41 ` Paolo Bonzini
2021-12-10 16:01 ` Sean Christopherson
2021-12-10 16:13 ` Paolo Bonzini
2021-12-10 17:15 ` Sean Christopherson
2021-12-15 18:53 ` Sean Christopherson
2021-12-19 18:41 ` Paolo Bonzini
2021-12-09 6:05 ` [PATCH 2/7] KVM: x86: Invoke kvm_mmu_unload() directly on CR4.PCIDE change Sean Christopherson
2021-12-09 6:05 ` [PATCH 3/7] KVM: Drop kvm_reload_remote_mmus(), open code request in x86 users Sean Christopherson
2021-12-09 6:05 ` Sean Christopherson [this message]
2021-12-09 6:05 ` [PATCH 5/7] KVM: s390: Replace KVM_REQ_MMU_RELOAD usage with arch specific request Sean Christopherson
2021-12-09 9:14 ` Claudio Imbrenda
2021-12-09 10:52 ` Janosch Frank
2021-12-09 6:05 ` [PATCH 6/7] KVM: Drop KVM_REQ_MMU_RELOAD and update vcpu-requests.rst documentation Sean Christopherson
2021-12-09 8:17 ` Claudio Imbrenda
2021-12-09 6:05 ` [PATCH 7/7] KVM: WARN if is_unsync_root() is called on a root without a shadow page Sean Christopherson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211209060552.2956723-5-seanjc@google.com \
--to=seanjc@google.com \
--cc=bgardon@google.com \
--cc=borntraeger@linux.ibm.com \
--cc=david@redhat.com \
--cc=frankja@linux.ibm.com \
--cc=imbrenda@linux.ibm.com \
--cc=jiangshanlai@gmail.com \
--cc=jmattson@google.com \
--cc=joro@8bytes.org \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mlevitsk@redhat.com \
--cc=pbonzini@redhat.com \
--cc=vkuznets@redhat.com \
--cc=wanpengli@tencent.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox