From: Avi Kivity <avi@redhat.com>
To: Marcelo Tosatti <mtosatti@redhat.com>,
Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>,
kvm@vger.kernel.org
Subject: [PATCH v2] KVM: MMU: Don't use RCU for lockless shadow walking
Date: Tue, 24 Apr 2012 12:47:25 +0300 [thread overview]
Message-ID: <1335260845-16271-1-git-send-email-avi@redhat.com> (raw)
Using RCU for lockless shadow walking can increase the amount of memory
in use by the system, since RCU grace periods are unpredictable. We also
have an unconditional write to a shared variable (reader_counter), which
isn't good for scaling.
Replace that with a scheme similar to x86's get_user_pages_fast(): disable
interrupts during lockless shadow walk to force the freer
(kvm_mmu_commit_zap_page()) to wait for the TLB flush IPI to find the
processor with interrupts enabled.
We also add a new vcpu->mode, READING_SHADOW_PAGE_TABLES, to prevent
kvm_flush_remote_tlbs() from avoiding the IPI.
Signed-off-by: Avi Kivity <avi@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 4 ---
arch/x86/kvm/mmu.c | 72 +++++++++++++++------------------------
include/linux/kvm_host.h | 3 +-
3 files changed, 30 insertions(+), 49 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f624ca7..67e66e6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -237,8 +237,6 @@ struct kvm_mmu_page {
#endif
int write_flooding_count;
-
- struct rcu_head rcu;
};
struct kvm_pio_request {
@@ -536,8 +534,6 @@ struct kvm_arch {
u64 hv_guest_os_id;
u64 hv_hypercall;
- atomic_t reader_counter;
-
#ifdef CONFIG_KVM_MMU_AUDIT
int audit_point;
#endif
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 07424cf..ef88034 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -551,19 +551,28 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
{
- rcu_read_lock();
- atomic_inc(&vcpu->kvm->arch.reader_counter);
-
- /* Increase the counter before walking shadow page table */
- smp_mb__after_atomic_inc();
+ /*
+ * Prevent page table teardown by making any free-er wait during
+ * kvm_flush_remote_tlbs() IPI to all active vcpus.
+ */
+ local_irq_disable();
+ vcpu->mode = READING_SHADOW_PAGE_TABLES;
+ /*
+ * wmb: advertise vcpu->mode change
+ * rmb: make sure we see updated sptes
+ */
+ smp_mb();
}
static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
{
- /* Decrease the counter after walking shadow page table finished */
- smp_mb__before_atomic_dec();
- atomic_dec(&vcpu->kvm->arch.reader_counter);
- rcu_read_unlock();
+ /*
+ * Make our reads and writes to shadow page tables globally visible
+ * before leaving READING_SHADOW_PAGE_TABLES mode.
+ */
+ smp_mb();
+ vcpu->mode = OUTSIDE_GUEST_MODE;
+ local_irq_enable();
}
static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
@@ -1989,30 +1998,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
return ret;
}
-static void kvm_mmu_isolate_pages(struct list_head *invalid_list)
-{
- struct kvm_mmu_page *sp;
-
- list_for_each_entry(sp, invalid_list, link)
- kvm_mmu_isolate_page(sp);
-}
-
-static void free_pages_rcu(struct rcu_head *head)
-{
- struct kvm_mmu_page *next, *sp;
-
- sp = container_of(head, struct kvm_mmu_page, rcu);
- while (sp) {
- if (!list_empty(&sp->link))
- next = list_first_entry(&sp->link,
- struct kvm_mmu_page, link);
- else
- next = NULL;
- kvm_mmu_free_page(sp);
- sp = next;
- }
-}
-
static void kvm_mmu_commit_zap_page(struct kvm *kvm,
struct list_head *invalid_list)
{
@@ -2021,17 +2006,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
if (list_empty(invalid_list))
return;
- kvm_flush_remote_tlbs(kvm);
-
- if (atomic_read(&kvm->arch.reader_counter)) {
- kvm_mmu_isolate_pages(invalid_list);
- sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
- list_del_init(invalid_list);
+ /*
+ * wmb: make sure everyone sees our modifications to the page tables
+ * rmb: make sure we see changes to vcpu->mode
+ */
+ smp_mb();
- trace_kvm_mmu_delay_free_pages(sp);
- call_rcu(&sp->rcu, free_pages_rcu);
- return;
- }
+ /*
+ * Wait for all vcpus to exit guest mode and/or lockless shadow
+ * page table walks.
+ */
+ kvm_flush_remote_tlbs(kvm);
do {
sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
@@ -2039,7 +2024,6 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
kvm_mmu_isolate_page(sp);
kvm_mmu_free_page(sp);
} while (!list_empty(invalid_list));
-
}
/*
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 186ffab..d1f1adf 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -128,7 +128,8 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
enum {
OUTSIDE_GUEST_MODE,
IN_GUEST_MODE,
- EXITING_GUEST_MODE
+ EXITING_GUEST_MODE,
+ READING_SHADOW_PAGE_TABLES,
};
/*
--
1.7.10
next reply other threads:[~2012-04-24 9:47 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-04-24 9:47 Avi Kivity [this message]
2012-04-24 10:13 ` [PATCH v2] KVM: MMU: Don't use RCU for lockless shadow walking Xiao Guangrong
2012-04-24 10:42 ` Avi Kivity
2012-04-26 22:00 ` Marcelo Tosatti
2012-04-27 6:07 ` Xiao Guangrong
2012-04-27 21:49 ` Marcelo Tosatti
2012-04-29 9:38 ` Avi Kivity
2012-04-29 9:35 ` Avi Kivity
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1335260845-16271-1-git-send-email-avi@redhat.com \
--to=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=mtosatti@redhat.com \
--cc=xiaoguangrong@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox