From: Lai Jiangshan <jiangshanlai@gmail.com>
To: linux-kernel@vger.kernel.org
Cc: Paolo Bonzini <pbonzini@redhat.com>,
Lai Jiangshan <laijs@linux.alibaba.com>,
Sean Christopherson <seanjc@google.com>,
Vitaly Kuznetsov <vkuznets@redhat.com>,
Wanpeng Li <wanpengli@tencent.com>,
Jim Mattson <jmattson@google.com>, Joerg Roedel <joro@8bytes.org>,
Thomas Gleixner <tglx@linutronix.de>,
Ingo Molnar <mingo@redhat.com>, Borislav Petkov <bp@alien8.de>,
x86@kernel.org, "H. Peter Anvin" <hpa@zytor.com>,
Marcelo Tosatti <mtosatti@redhat.com>,
Avi Kivity <avi@redhat.com>,
kvm@vger.kernel.org
Subject: [PATCH 2/7] KVM: X86: Synchronize the shadow pagetable before link it
Date: Tue, 24 Aug 2021 15:55:18 +0800 [thread overview]
Message-ID: <20210824075524.3354-3-jiangshanlai@gmail.com> (raw)
In-Reply-To: <20210824075524.3354-1-jiangshanlai@gmail.com>
From: Lai Jiangshan <laijs@linux.alibaba.com>
If gpte is changed from non-present to present, the guest doesn't need
to flush tlb per SDM. So the host must synchronze sp before
link it. Otherwise the guest might use a wrong mapping.
For example: the guest first changes a level-1 pagetable, and then
links its parent to a new place where the original gpte is non-present.
Finally the guest can access the remapped area without flushing
the tlb. The guest's behavior should be allowed per SDM, but the host
kvm mmu makes it wrong.
Fixes: 4731d4c7a077 ("KVM: MMU: out of sync shadow core")
Signed-off-by: Lai Jiangshan <laijs@linux.alibaba.com>
---
arch/x86/kvm/mmu/mmu.c | 21 ++++++++++++++-------
arch/x86/kvm/mmu/paging_tmpl.h | 28 +++++++++++++++++++++++++---
2 files changed, 39 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 313918df1a10..987953a901d2 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2032,8 +2032,9 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents)
} while (!sp->unsync_children);
}
-static void mmu_sync_children(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *parent)
+static bool mmu_sync_children(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *parent,
+ bool root)
{
int i;
struct kvm_mmu_page *sp;
@@ -2061,11 +2062,20 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu,
if (need_resched() || rwlock_needbreak(&vcpu->kvm->mmu_lock)) {
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
cond_resched_rwlock_write(&vcpu->kvm->mmu_lock);
+ /*
+ * If @parent is not root, the caller doesn't have
+ * any reference to it. And we couldn't access to
+ * @parent and continue synchronizing after the
+ * mmu_lock was once released.
+ */
+ if (!root)
+ return false;
flush = false;
}
}
kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush);
+ return true;
}
static void __clear_sp_write_flooding_count(struct kvm_mmu_page *sp)
@@ -2151,9 +2161,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
}
- if (sp->unsync_children)
- kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
-
__clear_sp_write_flooding_count(sp);
trace_get_page:
@@ -3650,7 +3657,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
write_lock(&vcpu->kvm->mmu_lock);
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
- mmu_sync_children(vcpu, sp);
+ mmu_sync_children(vcpu, sp, true);
kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
write_unlock(&vcpu->kvm->mmu_lock);
@@ -3666,7 +3673,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
if (IS_VALID_PAE_ROOT(root)) {
root &= PT64_BASE_ADDR_MASK;
sp = to_shadow_page(root);
- mmu_sync_children(vcpu, sp);
+ mmu_sync_children(vcpu, sp, true);
}
}
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 50ade6450ace..48c7fe1b2d50 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -664,7 +664,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
* emulate this operation, return 1 to indicate this case.
*/
static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
- struct guest_walker *gw)
+ struct guest_walker *gw, unsigned long mmu_seq)
{
struct kvm_mmu_page *sp = NULL;
struct kvm_shadow_walk_iterator it;
@@ -678,6 +678,8 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
top_level = vcpu->arch.mmu->root_level;
if (top_level == PT32E_ROOT_LEVEL)
top_level = PT32_ROOT_LEVEL;
+
+again:
/*
* Verify that the top-level gpte is still there. Since the page
* is a root page, it is either write protected (and cannot be
@@ -713,8 +715,28 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
if (FNAME(gpte_changed)(vcpu, gw, it.level - 1))
goto out_gpte_changed;
- if (sp)
+ if (sp) {
+ /*
+ * We must synchronize the pagetable before link it
+ * because the guest doens't need to flush tlb when
+ * gpte is changed from non-present to present.
+ * Otherwise, the guest may use the wrong mapping.
+ *
+ * For PG_LEVEL_4K, kvm_mmu_get_page() has already
+ * synchronized it transiently via kvm_sync_page().
+ *
+ * For higher level pagetable, we synchronize it
+ * via slower mmu_sync_children(). If it once
+ * released the mmu_lock, we need to restart from
+ * the root since we don't have reference to @sp.
+ */
+ if (sp->unsync_children && !mmu_sync_children(vcpu, sp, false)) {
+ if (mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva))
+ goto out_gpte_changed;
+ goto again;
+ }
link_shadow_page(vcpu, it.sptep, sp);
+ }
}
kvm_mmu_hugepage_adjust(vcpu, fault);
@@ -905,7 +927,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
r = make_mmu_pages_available(vcpu);
if (r)
goto out_unlock;
- r = FNAME(fetch)(vcpu, fault, &walker);
+ r = FNAME(fetch)(vcpu, fault, &walker, mmu_seq);
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
out_unlock:
--
2.19.1.6.gb485710b
next prev parent reply other threads:[~2021-08-24 17:59 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-24 7:55 [PATCH 0/7] KVM: X86: MMU: misc fixes and cleanups Lai Jiangshan
2021-08-24 7:55 ` [PATCH 1/7] KVM: X86: Fix missed remote tlb flush in rmap_write_protect() Lai Jiangshan
2021-09-02 21:38 ` Sean Christopherson
2021-09-13 9:57 ` Maxim Levitsky
2021-08-24 7:55 ` Lai Jiangshan [this message]
2021-09-02 23:40 ` [PATCH 2/7] KVM: X86: Synchronize the shadow pagetable before link it Sean Christopherson
2021-09-02 23:54 ` Sean Christopherson
2021-09-03 0:44 ` Lai Jiangshan
2021-09-03 16:06 ` Sean Christopherson
2021-09-03 16:25 ` Lai Jiangshan
2021-09-03 16:40 ` Sean Christopherson
2021-09-03 17:00 ` Lai Jiangshan
2021-09-03 16:33 ` Lai Jiangshan
2021-09-03 0:51 ` Lai Jiangshan
2021-09-13 11:30 ` Maxim Levitsky
2021-09-13 20:49 ` Sean Christopherson
2021-09-13 22:31 ` Maxim Levitsky
2021-08-24 7:55 ` [PATCH 3/7] KVM: X86: Zap the invalid list after remote tlb flushing Lai Jiangshan
2021-09-02 21:54 ` Sean Christopherson
2021-08-24 7:55 ` [PATCH 4/7] KVM: X86: Remove FNAME(update_pte) Lai Jiangshan
2021-09-13 9:49 ` Maxim Levitsky
2021-08-24 7:55 ` [PATCH 5/7] KVM: X86: Don't unsync pagetables when speculative Lai Jiangshan
2021-09-13 11:02 ` Maxim Levitsky
2021-09-18 3:06 ` Lai Jiangshan
2021-08-24 7:55 ` [PATCH 6/7] KVM: X86: Don't check unsync if the original spte is writible Lai Jiangshan
2021-08-24 7:55 ` [PATCH 7/7] KVM: X86: Also prefetch the last range in __direct_pte_prefetch() Lai Jiangshan
2021-08-25 15:18 ` Sean Christopherson
2021-08-25 22:58 ` Lai Jiangshan
2021-08-31 18:02 ` [PATCH 0/7] KVM: X86: MMU: misc fixes and cleanups Lai Jiangshan
2021-08-31 21:57 ` Sean Christopherson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210824075524.3354-3-jiangshanlai@gmail.com \
--to=jiangshanlai@gmail.com \
--cc=avi@redhat.com \
--cc=bp@alien8.de \
--cc=hpa@zytor.com \
--cc=jmattson@google.com \
--cc=joro@8bytes.org \
--cc=kvm@vger.kernel.org \
--cc=laijs@linux.alibaba.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=mtosatti@redhat.com \
--cc=pbonzini@redhat.com \
--cc=seanjc@google.com \
--cc=tglx@linutronix.de \
--cc=vkuznets@redhat.com \
--cc=wanpengli@tencent.com \
--cc=x86@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).