public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Gleb Natapov <gleb@redhat.com>
To: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: mtosatti@redhat.com, avi.kivity@gmail.com,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Subject: Re: [PATCH v3 08/15] KVM: MMU: allow unmap invalid rmap out of mmu-lock
Date: Thu, 18 Apr 2013 14:00:36 +0300	[thread overview]
Message-ID: <20130418110036.GS8997@redhat.com> (raw)
In-Reply-To: <1366093973-2617-9-git-send-email-xiaoguangrong@linux.vnet.ibm.com>

On Tue, Apr 16, 2013 at 02:32:46PM +0800, Xiao Guangrong wrote:
> pte_list_clear_concurrently allows us to reset pte-desc entry
> out of mmu-lock. We can reset spte out of mmu-lock if we can protect the
> lifecycle of sp, we use this way to achieve the goal:
> 
> unmap_memslot_rmap_nolock():
> for-each-rmap-in-slot:
>       preempt_disable
>       kvm->arch.being_unmapped_rmap = rmapp
>       clear spte and reset rmap entry
>       kvm->arch.being_unmapped_rmap = NULL
>       preempt_enable
> 
> Other patch like zap-sp and mmu-notify which are protected
> by mmu-lock:
>       clear spte and reset rmap entry
> retry:
>       if (kvm->arch.being_unmapped_rmap == rmap)
> 		goto retry
> (the wait is very rare and clear one rmap is very fast, it
> is not bad even if wait is needed)
> 
I do not understand what how this achieve the goal. Suppose that rmap
== X and kvm->arch.being_unmapped_rmap == NULL so "goto retry" is skipped,
but moment later unmap_memslot_rmap_nolock() does
vm->arch.being_unmapped_rmap = X.
 
> Then, we can sure the spte is always available when we do
> unmap_memslot_rmap_nolock
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
> ---
>  arch/x86/include/asm/kvm_host.h |    2 +
>  arch/x86/kvm/mmu.c              |  114 ++++++++++++++++++++++++++++++++++++---
>  arch/x86/kvm/mmu.h              |    2 +-
>  3 files changed, 110 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 5fd6ed1..1ad9a34 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -536,6 +536,8 @@ struct kvm_arch {
>  	 * Hash table of struct kvm_mmu_page.
>  	 */
>  	struct list_head active_mmu_pages;
> +	unsigned long *being_unmapped_rmap;
> +
>  	struct list_head assigned_dev_head;
>  	struct iommu_domain *iommu_domain;
>  	int iommu_flags;
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 2a7a5d0..e6414d2 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1104,10 +1104,10 @@ static int slot_rmap_add(struct kvm_memory_slot *slot,
>  	return slot->arch.ops->rmap_add(vcpu, spte, rmapp);
>  }
>  
> -static void slot_rmap_remove(struct kvm_memory_slot *slot,
> +static void slot_rmap_remove(struct kvm_memory_slot *slot, struct kvm *kvm,
>  			     unsigned long *rmapp, u64 *spte)
>  {
> -	slot->arch.ops->rmap_remove(spte, rmapp);
> +	slot->arch.ops->rmap_remove(kvm, spte, rmapp);
>  }
>  
>  static int rmap_add(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
> @@ -1132,7 +1132,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
>  	sp = page_header(__pa(spte));
>  	gfn = kvm_mmu_page_get_gfn(sp, spte - sp->spt);
>  	rmapp = gfn_to_rmap(kvm, &slot, gfn, sp->role.level);
> -	slot_rmap_remove(slot, rmapp, spte);
> +	slot_rmap_remove(slot, kvm, rmapp, spte);
>  }
>  
>  /*
> @@ -1589,9 +1589,14 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
>  	return kvm_handle_hva(kvm, hva, 0, slot_rmap_test_age);
>  }
>  
> +static void rmap_remove_spte(struct kvm *kvm, u64 *spte, unsigned long *rmapp)
> +{
> +	pte_list_remove(spte, rmapp);
> +}
> +
>  static struct rmap_operations normal_rmap_ops = {
>  	.rmap_add = pte_list_add,
> -	.rmap_remove = pte_list_remove,
> +	.rmap_remove = rmap_remove_spte,
>  
>  	.rmap_write_protect = __rmap_write_protect,
>  
> @@ -1613,9 +1618,27 @@ static int invalid_rmap_add(struct kvm_vcpu *vcpu, u64 *spte,
>  	return 0;
>  }
>  
> -static void invalid_rmap_remove(u64 *spte, unsigned long *rmapp)
> +static void sync_being_unmapped_rmap(struct kvm *kvm, unsigned long *rmapp)
> +{
> +	/*
> +	 * Ensure all the sptes on the rmap have been zapped and
> +	 * the rmap's entries have been reset so that
> +	 * unmap_invalid_rmap_nolock can not get any spte from the
> +	 * rmap after calling sync_being_unmapped_rmap().
> +	 */
> +	smp_mb();
> +retry:
> +	if (unlikely(ACCESS_ONCE(kvm->arch.being_unmapped_rmap) == rmapp)) {
> +		cpu_relax();
> +		goto retry;
> +	}
> +}
> +
> +static void
> +invalid_rmap_remove(struct kvm *kvm, u64 *spte, unsigned long *rmapp)
>  {
>  	pte_list_clear_concurrently(spte, rmapp);
> +	sync_being_unmapped_rmap(kvm, rmapp);
>  }
>  
>  static bool invalid_rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
> @@ -1635,7 +1658,11 @@ static int __kvm_unmap_invalid_rmapp(unsigned long *rmapp)
>  		if (sptep == PTE_LIST_SPTE_SKIP)
>  			continue;
>  
> -		/* Do not call .rmap_remove(). */
> +		/*
> +		 * Do not call .rmap_remove() since we do not want to wait
> +		 * on sync_being_unmapped_rmap() when all sptes should be
> +		 * removed from the rmap.
> +		 */
>  		if (mmu_spte_clear_track_bits(sptep))
>  			pte_list_clear_concurrently(sptep, rmapp);
>  	}
> @@ -1645,7 +1672,10 @@ static int __kvm_unmap_invalid_rmapp(unsigned long *rmapp)
>  
>  static int kvm_unmap_invalid_rmapp(struct kvm *kvm, unsigned long *rmapp)
>  {
> -	return __kvm_unmap_invalid_rmapp(rmapp);
> +	int ret = __kvm_unmap_invalid_rmapp(rmapp);
> +
> +	sync_being_unmapped_rmap(kvm, rmapp);
> +	return ret;
>  }
>  
>  static int invalid_rmap_set_pte(struct kvm *kvm, unsigned long *rmapp,
> @@ -1686,6 +1716,76 @@ static struct rmap_operations invalid_rmap_ops = {
>  	.rmap_unmap = kvm_unmap_invalid_rmapp
>  };
>  
> +typedef void (*handle_rmap_fun)(unsigned long *rmapp, void *data);
> +static void walk_memslot_rmap_nolock(struct kvm_memory_slot *slot,
> +				     handle_rmap_fun fun, void *data)
> +{
> +	int level;
> +
> +	for (level = PT_PAGE_TABLE_LEVEL;
> +	      level < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++level) {
> +		unsigned long idx, *rmapp;
> +
> +		rmapp = slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL];
> +		idx = gfn_to_index(slot->base_gfn + slot->npages - 1,
> +				   slot->base_gfn, level) + 1;
> +		/*
> +		 * Walk ramp from the high index to low index to reduce
> +		 * possible wait in sync_being_unmapped_rmap().
> +		 */
> +		while (idx--)
> +			fun(rmapp + idx, data);
> +	}
> +}
> +
> +static void unmap_rmap_no_lock_begin(struct kvm *kvm, unsigned long *rmapp)
> +{
> +	preempt_disable();
> +	kvm->arch.being_unmapped_rmap = rmapp;
> +
> +	/*
> +	 * Set being_unmapped_rmap should be before read/write any
> +	 * sptes on the rmaps.
> +	 * See the comment in sync_being_unmapped_rmap().
> +	 */
> +	smp_mb();
> +}
> +
> +static void unmap_rmap_no_lock_end(struct kvm *kvm)
> +{
> +	/*
> +	 * Ensure clearing spte and resetting rmap's entries has
> +	 * been finished.
> +	 * See the comment in sync_being_unmapped_rmap().
> +	 */
> +	smp_mb();
> +	kvm->arch.being_unmapped_rmap = NULL;
> +	preempt_enable();
> +}
> +
> +static void unmap_invalid_rmap_nolock(unsigned long *rmapp, void *data)
> +{
> +	struct kvm *kvm = (struct kvm *)data;
> +
> +	if (!ACCESS_ONCE(*rmapp))
> +		return;
> +
> +	unmap_rmap_no_lock_begin(kvm, rmapp);
> +	__kvm_unmap_invalid_rmapp(rmapp);
> +	unmap_rmap_no_lock_end(kvm);
> +}
> +
> +static void
> +unmap_memslot_rmap_nolock(struct kvm *kvm, struct kvm_memory_slot *slot)
> +{
> +	/* Only invalid rmaps can be unmapped out of mmu-lock. */
> +	WARN_ON(slot->arch.ops != &invalid_rmap_ops);
> +	/* Use slots_lock to protect kvm->arch.being_unmapped_rmap. */
> +	WARN_ON(!mutex_is_locked(&kvm->slots_lock));
> +
> +	walk_memslot_rmap_nolock(slot, unmap_invalid_rmap_nolock, kvm);
> +}
> +
>  #ifdef MMU_DEBUG
>  static int is_empty_shadow_page(u64 *spt)
>  {
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index bb2b22e..d6aa31a 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -117,7 +117,7 @@ static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
>  struct rmap_operations {
>  	int (*rmap_add)(struct kvm_vcpu *vcpu, u64 *spte,
>  			unsigned long *rmap);
> -	void (*rmap_remove)(u64 *spte, unsigned long *rmap);
> +	void (*rmap_remove)(struct kvm *kvm, u64 *spte, unsigned long *rmap);
>  
>  	bool (*rmap_write_protect)(struct kvm *kvm, unsigned long *rmap,
>  				   bool pt_protect);
> -- 
> 1.7.7.6

--
			Gleb.

  reply	other threads:[~2013-04-18 11:00 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-04-16  6:32 [PATCH v3 00/15] KVM: MMU: fast zap all shadow pages Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 01/15] KVM: x86: clean up and optimize for kvm_arch_free_memslot Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 02/15] KVM: fold kvm_arch_create_memslot into kvm_arch_prepare_memory_region Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 03/15] KVM: x86: do not reuse rmap when memslot is moved Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 04/15] KVM: MMU: abstract memslot rmap related operations Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 05/15] KVM: MMU: allow per-rmap operations Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 06/15] KVM: MMU: allow concurrently clearing spte on remove-only pte-list Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 07/15] KVM: MMU: introduce invalid rmap handlers Xiao Guangrong
2013-04-17 23:38   ` Marcelo Tosatti
2013-04-18  3:15     ` Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 08/15] KVM: MMU: allow unmap invalid rmap out of mmu-lock Xiao Guangrong
2013-04-18 11:00   ` Gleb Natapov [this message]
2013-04-18 11:22     ` Xiao Guangrong
2013-04-18 11:38       ` Gleb Natapov
2013-04-18 12:10         ` Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 09/15] KVM: MMU: introduce free_meslot_rmap_desc_nolock Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 10/15] KVM: x86: introduce memslot_set_lpage_disallowed Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 11/15] KVM: MMU: introduce kvm_clear_all_lpage_info Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 12/15] KVM: MMU: fast invalid all shadow pages Xiao Guangrong
2013-04-18  0:05   ` Marcelo Tosatti
2013-04-18  4:00     ` Xiao Guangrong
2013-04-18 13:03       ` Marcelo Tosatti
2013-04-18 13:29         ` Marcelo Tosatti
2013-04-18 15:20           ` Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 13/15] KVM: x86: use the fast way to invalid all pages Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 14/15] KVM: move srcu_read_lock/srcu_read_unlock to arch-specified code Xiao Guangrong
2013-04-16  6:32 ` [PATCH v3 15/15] KVM: MMU: replace kvm_zap_all with kvm_mmu_invalid_all_pages Xiao Guangrong
2013-04-18  0:08   ` Marcelo Tosatti
2013-04-18  4:03     ` Xiao Guangrong
2013-04-20 17:18       ` Marcelo Tosatti
2013-04-21  6:59         ` Xiao Guangrong
2013-04-21 13:03 ` [PATCH v3 00/15] KVM: MMU: fast zap all shadow pages Gleb Natapov
2013-04-21 14:09   ` Xiao Guangrong
2013-04-21 15:24     ` Marcelo Tosatti
2013-04-22  2:50       ` Xiao Guangrong
2013-04-22  9:21     ` Gleb Natapov
2013-04-23  0:19       ` Xiao Guangrong
2013-04-23  6:28         ` Gleb Natapov
2013-04-23  7:20           ` Xiao Guangrong
2013-04-23  7:33             ` Gleb Natapov
2013-04-21 15:27   ` Marcelo Tosatti
2013-04-21 15:35     ` Marcelo Tosatti
2013-04-22 12:39       ` Gleb Natapov
2013-04-22 13:45         ` Takuya Yoshikawa
2013-04-22 23:02           ` Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20130418110036.GS8997@redhat.com \
    --to=gleb@redhat.com \
    --cc=avi.kivity@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=xiaoguangrong@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox