All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gleb Natapov <gleb@redhat.com>
To: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Cc: avi.kivity@gmail.com, mtosatti@redhat.com, pbonzini@redhat.com,
	linux-kernel@vger.kernel.org, kvm@vger.kernel.org
Subject: Re: [PATCH v3 13/15] KVM: MMU: locklessly write-protect the page
Date: Thu, 24 Oct 2013 12:17:32 +0300	[thread overview]
Message-ID: <20131024091732.GA5289@redhat.com> (raw)
In-Reply-To: <1382534973-13197-14-git-send-email-xiaoguangrong@linux.vnet.ibm.com>

On Wed, Oct 23, 2013 at 09:29:31PM +0800, Xiao Guangrong wrote:
> Currently, when mark memslot dirty logged or get dirty page, we need to
> write-protect large guest memory, it is the heavy work, especially, we
> need to hold mmu-lock which is also required by vcpu to fix its page table
> fault and mmu-notifier when host page is being changed. In the extreme
> cpu / memory used guest, it becomes a scalability issue
> 
> This patch introduces a way to locklessly write-protect guest memory
> 
> Now, lockless rmap walk, lockless shadow page table access and lockless
> spte wirte-protection are ready, it is the time to implements page
> write-protection out of mmu-lock
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
> ---
>  arch/x86/include/asm/kvm_host.h |  4 ---
>  arch/x86/kvm/mmu.c              | 59 ++++++++++++++++++++++++++++++-----------
>  arch/x86/kvm/mmu.h              |  6 +++++
>  arch/x86/kvm/x86.c              | 11 ++++----
>  4 files changed, 55 insertions(+), 25 deletions(-)
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index df9ae10..cdb6f29 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -793,10 +793,6 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
>  		u64 dirty_mask, u64 nx_mask, u64 x_mask);
>  
>  void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
> -void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
> -void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
> -				     struct kvm_memory_slot *slot,
> -				     gfn_t gfn_offset, unsigned long mask);
>  void kvm_mmu_zap_all(struct kvm *kvm);
>  void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm);
>  unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm);
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 8b96d96..d82bbec 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -1386,8 +1386,37 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
>  	return flush;
>  }
>  
> -/**
> - * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
> +static void __rmap_write_protect_lockless(u64 *sptep)
> +{
> +	u64 spte;
> +
> +retry:
> +	/*
> +	 * Note we may partly read the sptep on 32bit host, however, we
> +	 * allow this case because:
> +	 * - we do not access the page got from the sptep.
> +	 * - cmpxchg64 can detect that case and avoid setting a wrong value
> +	 *   to the sptep.
> +	 */
> +	spte = *rcu_dereference(sptep);
> +	if (unlikely(!is_last_spte(spte) || !is_writable_pte(spte)))
is_last_spte gets two parameters.

> +		return;
> +
> +	if (likely(cmpxchg64(sptep, spte, spte & ~PT_WRITABLE_MASK) == spte))
> +		return;
> +
> +	goto retry;
> +}
> +
> +static void rmap_write_protect_lockless(unsigned long *rmapp)
> +{
> +	pte_list_walk_lockless(rmapp, __rmap_write_protect_lockless);
> +}
> +
> +/*
> + * kvm_mmu_write_protect_pt_masked_lockless - write protect selected PT level
> + * pages out of mmu-lock.
> + *
>   * @kvm: kvm instance
>   * @slot: slot to protect
>   * @gfn_offset: start of the BITS_PER_LONG pages we care about
> @@ -1396,16 +1425,17 @@ static bool __rmap_write_protect(struct kvm *kvm, unsigned long *rmapp,
>   * Used when we do not need to care about huge page mappings: e.g. during dirty
>   * logging we do not have any such mappings.
>   */
> -void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
> -				     struct kvm_memory_slot *slot,
> -				     gfn_t gfn_offset, unsigned long mask)
> +void
> +kvm_mmu_write_protect_pt_masked_lockless(struct kvm *kvm,
> +					 struct kvm_memory_slot *slot,
> +					 gfn_t gfn_offset, unsigned long mask)
>  {
>  	unsigned long *rmapp;
>  
>  	while (mask) {
>  		rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
>  				      PT_PAGE_TABLE_LEVEL, slot);
> -		__rmap_write_protect(kvm, rmapp, false);
> +		rmap_write_protect_lockless(rmapp);
>  
>  		/* clear the first set bit */
>  		mask &= mask - 1;
> @@ -4477,7 +4507,7 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
>  	init_kvm_mmu(vcpu);
>  }
>  
> -void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
> +void kvm_mmu_slot_remove_write_access_lockless(struct kvm *kvm, int slot)
>  {
>  	struct kvm_memory_slot *memslot;
>  	gfn_t last_gfn;
> @@ -4486,8 +4516,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
>  	memslot = id_to_memslot(kvm->memslots, slot);
>  	last_gfn = memslot->base_gfn + memslot->npages - 1;
>  
> -	spin_lock(&kvm->mmu_lock);
> -
> +	rcu_read_lock();
>  	for (i = PT_PAGE_TABLE_LEVEL;
>  	     i < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++i) {
>  		unsigned long *rmapp;
> @@ -4497,15 +4526,15 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot)
>  		last_index = gfn_to_index(last_gfn, memslot->base_gfn, i);
>  
>  		for (index = 0; index <= last_index; ++index, ++rmapp) {
> -			if (*rmapp)
> -				__rmap_write_protect(kvm, rmapp, false);
> +			rmap_write_protect_lockless(rmapp);
>  
> -			if (need_resched() || spin_needbreak(&kvm->mmu_lock))
> -				cond_resched_lock(&kvm->mmu_lock);
> +			if (need_resched()) {
> +				rcu_read_lock();
> +				rcu_read_unlock();
> +			}
>  		}
>  	}
> -
> -	spin_unlock(&kvm->mmu_lock);
> +	rcu_read_unlock();
>  
>  	/*
>  	 * We can flush all the TLBs out of the mmu lock without TLB
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index 2926152..33f313b 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -117,4 +117,10 @@ static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
>  }
>  
>  void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
> +
> +void kvm_mmu_slot_remove_write_access_lockless(struct kvm *kvm, int slot);
> +void
> +kvm_mmu_write_protect_pt_masked_lockless(struct kvm *kvm,
> +					 struct kvm_memory_slot *slot,
> +					 gfn_t gfn_offset, unsigned long mask);
>  #endif
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 4ac3a27..c6233e1 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3554,8 +3554,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
>  	dirty_bitmap_buffer = dirty_bitmap + n / sizeof(long);
>  	memset(dirty_bitmap_buffer, 0, n);
>  
> -	spin_lock(&kvm->mmu_lock);
> -
> +	rcu_read_lock();
>  	for (i = 0; i < n / sizeof(long); i++) {
>  		unsigned long mask;
>  		gfn_t offset;
> @@ -3579,10 +3578,10 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log)
>  		dirty_bitmap_buffer[i] = mask;
>  
>  		offset = i * BITS_PER_LONG;
> -		kvm_mmu_write_protect_pt_masked(kvm, memslot, offset, mask);
> +		kvm_mmu_write_protect_pt_masked_lockless(kvm, memslot,
> +							 offset, mask);
>  	}
> -
> -	spin_unlock(&kvm->mmu_lock);
> +	rcu_read_unlock();
>  
>  	/*
>  	 * All the TLBs can be flushed out of mmu lock, see the comments in
> @@ -7246,7 +7245,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
>  	 * See the comments in fast_page_fault().
>  	 */
>  	if ((change != KVM_MR_DELETE) && (mem->flags & KVM_MEM_LOG_DIRTY_PAGES))
> -		kvm_mmu_slot_remove_write_access(kvm, mem->slot);
> +		kvm_mmu_slot_remove_write_access_lockless(kvm, mem->slot);
>  }
>  
>  void kvm_arch_flush_shadow_all(struct kvm *kvm)
> -- 
> 1.8.1.4

--
			Gleb.

  reply	other threads:[~2013-10-24  9:17 UTC|newest]

Thread overview: 69+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-23 13:29 [PATCH v3 00/15] KVM: MMU: locklessly write-protect Xiao Guangrong
2013-10-23 13:29 ` [PATCH v3 01/15] KVM: MMU: properly check last spte in fast_page_fault() Xiao Guangrong
2013-11-12  0:25   ` Marcelo Tosatti
2013-10-23 13:29 ` [PATCH v3 02/15] KVM: MMU: lazily drop large spte Xiao Guangrong
2013-11-12 22:44   ` Marcelo Tosatti
2013-10-23 13:29 ` [PATCH v3 03/15] KVM: MMU: flush tlb if the spte can be locklessly modified Xiao Guangrong
2013-11-13  0:10   ` Marcelo Tosatti
2013-10-23 13:29 ` [PATCH v3 04/15] KVM: MMU: flush tlb out of mmu lock when write-protect the sptes Xiao Guangrong
2013-11-14  0:36   ` Marcelo Tosatti
2013-11-14  5:15     ` Xiao Guangrong
2013-11-14 18:39       ` Marcelo Tosatti
2013-11-15  7:09         ` Xiao Guangrong
2013-11-19  0:19           ` Marcelo Tosatti
2013-10-23 13:29 ` [PATCH v3 05/15] KVM: MMU: update spte and add it into rmap before dirty log Xiao Guangrong
2013-11-15  0:08   ` Marcelo Tosatti
2013-10-23 13:29 ` [PATCH v3 06/15] KVM: MMU: redesign the algorithm of pte_list Xiao Guangrong
2013-11-19  0:48   ` Marcelo Tosatti
2013-10-23 13:29 ` [PATCH v3 07/15] KVM: MMU: introduce nulls desc Xiao Guangrong
2013-11-22 19:14   ` Marcelo Tosatti
2013-11-25  6:11     ` Xiao Guangrong
2013-11-25  6:29       ` Xiao Guangrong
2013-11-25 18:12         ` Marcelo Tosatti
2013-11-26  3:21           ` Xiao Guangrong
2013-11-26 10:12             ` Gleb Natapov
2013-11-26 19:31             ` Marcelo Tosatti
2013-11-28  8:53               ` Xiao Guangrong
2013-12-03  7:10                 ` Xiao Guangrong
2013-12-05 13:50                   ` Marcelo Tosatti
2013-12-05 15:30                     ` Xiao Guangrong
2013-12-06  0:15                       ` Marcelo Tosatti
2013-12-06  0:22                       ` Marcelo Tosatti
2013-12-10  6:58                         ` Xiao Guangrong
2013-11-25 10:19       ` Gleb Natapov
2013-11-25 10:25         ` Xiao Guangrong
2013-11-25 12:48       ` Avi Kivity
2013-11-25 14:23         ` Marcelo Tosatti
2013-11-25 14:29           ` Gleb Natapov
2013-11-25 18:06             ` Marcelo Tosatti
2013-11-26  3:10           ` Xiao Guangrong
2013-11-26 10:15             ` Gleb Natapov
2013-11-26 19:58             ` Marcelo Tosatti
2013-11-28  8:32               ` Xiao Guangrong
2013-11-25 14:08       ` Marcelo Tosatti
2013-11-26  3:02         ` Xiao Guangrong
2013-11-25  9:31     ` Peter Zijlstra
2013-11-25 10:59       ` Xiao Guangrong
2013-11-25 11:05         ` Peter Zijlstra
2013-11-25 11:29           ` Peter Zijlstra
2013-10-23 13:29 ` [PATCH v3 08/15] KVM: MMU: introduce pte-list lockless walker Xiao Guangrong
2013-10-23 13:29 ` [PATCH v3 09/15] KVM: MMU: initialize the pointers in pte_list_desc properly Xiao Guangrong
2013-10-23 13:29 ` [PATCH v3 10/15] KVM: MMU: allocate shadow pages from slab Xiao Guangrong
2013-10-24  9:19   ` Gleb Natapov
2013-10-24  9:29     ` Xiao Guangrong
2013-10-24  9:52       ` Gleb Natapov
2013-10-24 10:10         ` Xiao Guangrong
2013-10-24 10:39           ` Gleb Natapov
2013-10-24 11:01             ` Xiao Guangrong
2013-10-24 12:32               ` Gleb Natapov
2013-10-28  3:16                 ` Xiao Guangrong
2013-10-23 13:29 ` [PATCH v3 11/15] KVM: MMU: locklessly access shadow page under rcu protection Xiao Guangrong
2013-10-23 13:29 ` [PATCH v3 12/15] KVM: MMU: check last spte with unawareness of mapping level Xiao Guangrong
2013-10-23 13:29 ` [PATCH v3 13/15] KVM: MMU: locklessly write-protect the page Xiao Guangrong
2013-10-24  9:17   ` Gleb Natapov [this message]
2013-10-24  9:24     ` Xiao Guangrong
2013-10-24  9:32       ` Gleb Natapov
2013-10-23 13:29 ` [PATCH v3 14/15] KVM: MMU: clean up spte_write_protect Xiao Guangrong
2013-10-23 13:29 ` [PATCH v3 15/15] KVM: MMU: use rcu functions to access the pointer Xiao Guangrong
2013-11-03 12:29 ` [PATCH v3 00/15] KVM: MMU: locklessly write-protect Gleb Natapov
2013-11-11  5:33   ` Xiao Guangrong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131024091732.GA5289@redhat.com \
    --to=gleb@redhat.com \
    --cc=avi.kivity@gmail.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=xiaoguangrong@linux.vnet.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.