From: Marcelo Tosatti <mtosatti@redhat.com>
To: Avi Kivity <avi@redhat.com>
Cc: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>, kvm@vger.kernel.org
Subject: Re: [PATCH RFC] KVM: MMU: Don't use RCU for lockless shadow walking
Date: Mon, 23 Apr 2012 22:17:47 -0300 [thread overview]
Message-ID: <20120424011747.GA15748@amt.cnet> (raw)
In-Reply-To: <1335197812-32064-1-git-send-email-avi@redhat.com>
On Mon, Apr 23, 2012 at 07:16:52PM +0300, Avi Kivity wrote:
> Using RCU for lockless shadow walking can increase the amount of memory
> in use by the system, since RCU grace periods are unpredictable. We also
> have an unconditional write to a shared variable (reader_counter), which
> isn't good for scaling.
>
> Replace that with a scheme similar to x86's get_user_pages_fast(): disable
> interrupts during lockless shadow walk to force the freer
> (kvm_mmu_commit_zap_page()) to wait for the TLB flush IPI to find the
> processor with interrupts enabled.
>
> We also add a new vcpu->mode, READING_SHADOW_PAGE_TABLES, to prevent
> kvm_flush_remote_tlbs() from avoiding the IPI.
>
> Signed-off-by: Avi Kivity <avi@redhat.com>
> ---
>
> Turned out to be simpler than expected. However, I think there's a problem
> with make_all_cpus_request() possible reading an incorrect vcpu->cpu.
>
> arch/x86/include/asm/kvm_host.h | 4 ---
> arch/x86/kvm/mmu.c | 61 +++++++++++----------------------------
> include/linux/kvm_host.h | 3 +-
> 3 files changed, 19 insertions(+), 49 deletions(-)
>
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index f624ca7..67e66e6 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -237,8 +237,6 @@ struct kvm_mmu_page {
> #endif
>
> int write_flooding_count;
> -
> - struct rcu_head rcu;
> };
>
> struct kvm_pio_request {
> @@ -536,8 +534,6 @@ struct kvm_arch {
> u64 hv_guest_os_id;
> u64 hv_hypercall;
>
> - atomic_t reader_counter;
> -
> #ifdef CONFIG_KVM_MMU_AUDIT
> int audit_point;
> #endif
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 07424cf..903af5e 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -551,19 +551,23 @@ static u64 mmu_spte_get_lockless(u64 *sptep)
>
> static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu)
> {
> - rcu_read_lock();
> - atomic_inc(&vcpu->kvm->arch.reader_counter);
> -
> - /* Increase the counter before walking shadow page table */
> - smp_mb__after_atomic_inc();
> + /*
> + * Prevent page table teardown by making any free-er wait during
> + * kvm_flush_remote_tlbs() IPI to all active vcpus.
> + */
> + local_irq_disable();
> + vcpu->mode = READING_SHADOW_PAGE_TABLES;
> + /*
> + * wmb: advertise vcpu->mode change
> + * rmb: make sure we see updated sptes
> + */
> + smp_mb();
> }
>
> static void walk_shadow_page_lockless_end(struct kvm_vcpu *vcpu)
> {
> - /* Decrease the counter after walking shadow page table finished */
> - smp_mb__before_atomic_dec();
> - atomic_dec(&vcpu->kvm->arch.reader_counter);
> - rcu_read_unlock();
> + vcpu->mode = OUTSIDE_GUEST_MODE;
> + local_irq_enable();
> }
>
> static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache,
> @@ -1989,30 +1993,6 @@ static int kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp,
> return ret;
> }
>
> -static void kvm_mmu_isolate_pages(struct list_head *invalid_list)
> -{
> - struct kvm_mmu_page *sp;
> -
> - list_for_each_entry(sp, invalid_list, link)
> - kvm_mmu_isolate_page(sp);
> -}
> -
> -static void free_pages_rcu(struct rcu_head *head)
> -{
> - struct kvm_mmu_page *next, *sp;
> -
> - sp = container_of(head, struct kvm_mmu_page, rcu);
> - while (sp) {
> - if (!list_empty(&sp->link))
> - next = list_first_entry(&sp->link,
> - struct kvm_mmu_page, link);
> - else
> - next = NULL;
> - kvm_mmu_free_page(sp);
> - sp = next;
> - }
> -}
> -
> static void kvm_mmu_commit_zap_page(struct kvm *kvm,
> struct list_head *invalid_list)
> {
> @@ -2021,25 +2001,18 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm,
> if (list_empty(invalid_list))
> return;
>
> + /*
> + * Wait for all vcpus to exit guest mode and/or lockless shadow
> + * page table walks.
> + */
> kvm_flush_remote_tlbs(kvm);
>
> - if (atomic_read(&kvm->arch.reader_counter)) {
> - kvm_mmu_isolate_pages(invalid_list);
> - sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
> - list_del_init(invalid_list);
> -
> - trace_kvm_mmu_delay_free_pages(sp);
> - call_rcu(&sp->rcu, free_pages_rcu);
> - return;
> - }
> -
> do {
> sp = list_first_entry(invalid_list, struct kvm_mmu_page, link);
> WARN_ON(!sp->role.invalid || sp->root_count);
> kvm_mmu_isolate_page(sp);
> kvm_mmu_free_page(sp);
> } while (!list_empty(invalid_list));
> -
> }
>
> /*
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 186ffab..d1f1adf 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -128,7 +128,8 @@ int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
> enum {
> OUTSIDE_GUEST_MODE,
> IN_GUEST_MODE,
> - EXITING_GUEST_MODE
> + EXITING_GUEST_MODE,
> + READING_SHADOW_PAGE_TABLES,
> };
Should add an explicit mb after prepare_zap_page? (currently rely on
unrelated ones internal to flush_remote_tlbs).
next prev parent reply other threads:[~2012-04-24 1:22 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-04-23 16:16 [PATCH RFC] KVM: MMU: Don't use RCU for lockless shadow walking Avi Kivity
2012-04-24 1:17 ` Marcelo Tosatti [this message]
2012-04-24 9:24 ` Avi Kivity
2012-05-14 12:41 ` Avi Kivity
2012-04-24 6:37 ` Xiao Guangrong
2012-04-24 9:19 ` Avi Kivity
2012-04-24 9:23 ` Avi Kivity
2012-04-24 9:54 ` Xiao Guangrong
2012-04-24 10:02 ` Avi Kivity
2012-04-24 10:05 ` Xiao Guangrong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20120424011747.GA15748@amt.cnet \
--to=mtosatti@redhat.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=xiaoguangrong@linux.vnet.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.