Re: [PATCH v6 3/3] KVM: MMU: prefetch ptes when intercepted guest #PF

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Marcelo Tosatti <mtosatti@redhat.com>
To: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Avi Kivity <avi@redhat.com>, LKML <linux-kernel@vger.kernel.org>,
	KVM list <kvm@vger.kernel.org>
Subject: Re: [PATCH v6 3/3] KVM: MMU: prefetch ptes when intercepted guest #PF
Date: Thu, 5 Aug 2010 11:38:50 -0300	[thread overview]
Message-ID: <20100805143850.GA1648@amt.cnet> (raw)
In-Reply-To: <4C58DF89.4080404@cn.fujitsu.com>

On Wed, Aug 04, 2010 at 11:33:29AM +0800, Xiao Guangrong wrote:
> Support prefetch ptes when intercept guest #PF, avoid to #PF by later
> access
> 
> If we meet any failure in the prefetch path, we will exit it and
> not try other ptes to avoid become heavy path
> 
> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
> ---
>  arch/x86/kvm/mmu.c         |  107 +++++++++++++++++++++++++++++++++++++++++++-
>  arch/x86/kvm/paging_tmpl.h |   81 ++++++++++++++++++++++++++++++++-
>  2 files changed, 184 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 9c69725..4501734 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -89,6 +89,8 @@ module_param(oos_shadow, bool, 0644);
>  	}
>  #endif
>  
> +#define PTE_PREFETCH_NUM		8
> +
>  #define PT_FIRST_AVAIL_BITS_SHIFT 9
>  #define PT64_SECOND_AVAIL_BITS_SHIFT 52
>  
> @@ -403,7 +405,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
>  	if (r)
>  		goto out;
>  	r = mmu_topup_memory_cache(&vcpu->arch.mmu_rmap_desc_cache,
> -				   rmap_desc_cache, 4);
> +				   rmap_desc_cache, 4 + PTE_PREFETCH_NUM);
>  	if (r)
>  		goto out;
>  	r = mmu_topup_memory_cache_page(&vcpu->arch.mmu_page_cache, 8);
> @@ -2090,6 +2092,108 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
>  {
>  }
>  
> +static struct kvm_memory_slot *
> +pte_prefetch_gfn_to_memslot(struct kvm_vcpu *vcpu, gfn_t gfn, bool no_dirty_log)
> +{
> +	struct kvm_memory_slot *slot;
> +
> +	slot = gfn_to_memslot(vcpu->kvm, gfn);
> +	if (!slot || slot->flags & KVM_MEMSLOT_INVALID ||
> +	      (no_dirty_log && slot->dirty_bitmap))
> +		slot = NULL;

Why is this no_dirty_log optimization worthwhile?

> +
> +	return slot;
> +}
> +
> +static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
> +				     bool no_dirty_log)
> +{
> +	struct kvm_memory_slot *slot;
> +	unsigned long hva;
> +
> +	slot = pte_prefetch_gfn_to_memslot(vcpu, gfn, no_dirty_log);
> +	if (!slot) {
> +		get_page(bad_page);
> +		return page_to_pfn(bad_page);
> +	}
> +
> +	hva = gfn_to_hva_memslot(slot, gfn);
> +
> +	return hva_to_pfn_atomic(vcpu->kvm, hva);
> +}
> +
> +static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
> +				    struct kvm_mmu_page *sp,
> +				    u64 *start, u64 *end)
> +{
> +	struct page *pages[PTE_PREFETCH_NUM];
> +	struct kvm_memory_slot *slot;
> +	unsigned hva, access = sp->role.access;
> +	int i, ret, npages = end - start;
> +	gfn_t gfn;
> +
> +	gfn = kvm_mmu_page_get_gfn(sp, start - sp->spt);
> +	slot = pte_prefetch_gfn_to_memslot(vcpu, gfn, access & ACC_WRITE_MASK);
> +	if (!slot || slot->npages - (gfn - slot->base_gfn) != npages)
> +		return -1;
> +
> +	hva = gfn_to_hva_memslot(slot, gfn);
> +	ret = __get_user_pages_fast(hva, npages, 1, pages);
> +	if (ret <= 0)
> +		return -1;

Better do one at a time with hva_to_pfn_atomic. Or, if you measure that
its worthwhile, do on a separate patch (using a helper as discussed
previously).

> +
> +	for (i = 0; i < ret; i++, gfn++, start++)
> +		mmu_set_spte(vcpu, start, ACC_ALL,
> +			     access, 0, 0, 1, NULL,
> +			     sp->role.level, gfn,
> +			     page_to_pfn(pages[i]), true, true);
> +
> +	return ret == npages ? 0 : -1;
> +}
> +
> +static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
> +				  struct kvm_mmu_page *sp, u64 *sptep)
> +{
> +	u64 *spte, *start = NULL;
> +	int i;
> +
> +	WARN_ON(!sp->role.direct);
> +
> +	i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1);
> +	spte = sp->spt + i;
> +
> +	for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) {
> +		if (*spte != shadow_trap_nonpresent_pte || spte == sptep) {
> +			if (!start)
> +				continue;
> +			if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0)
> +				break;
> +			start = NULL;
> +		} else if (!start)
> +			start = spte;
> +	}
> +}
> +
> +static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
> +{
> +	struct kvm_mmu_page *sp;
> +
> +	/*
> +	 * Since it's no accessed bit on EPT, it's no way to
> +	 * distinguish between actually accessed translations
> +	 * and prefetched, so disable pte prefetch if EPT is
> +	 * enabled.
> +	 */
> +	if (!shadow_accessed_mask)
> +		return;
> +
> +	sp = page_header(__pa(sptep));
> +	if (sp->role.level > PT_PAGE_TABLE_LEVEL)
> +		return;
> +
> +	__direct_pte_prefetch(vcpu, sp, sptep);
> +}
> +
>  static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
>  			int level, gfn_t gfn, pfn_t pfn)
>  {
> @@ -2103,6 +2207,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
>  			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
>  				     0, write, 1, &pt_write,
>  				     level, gfn, pfn, false, true);
> +			direct_pte_prefetch(vcpu, iterator.sptep);
>  			++vcpu->stat.pf_fixed;
>  			break;
>  		}
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 51ef909..114c17d 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -67,6 +67,7 @@ struct guest_walker {
>  	int level;
>  	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
>  	pt_element_t ptes[PT_MAX_FULL_LEVELS];
> +	pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
>  	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
>  	unsigned pt_access;
>  	unsigned pte_access;
> @@ -302,14 +303,87 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
>  static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu,
>  				struct guest_walker *gw, int level)
>  {
> -	int r;
>  	pt_element_t curr_pte;
> -
> -	r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1],
> +	gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1];
> +	u64 mask;
> +	int r, index;
> +
> +	if (level == PT_PAGE_TABLE_LEVEL) {
> +		mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1;
> +		base_gpa = pte_gpa & ~mask;
> +		index = (pte_gpa - base_gpa) / sizeof(pt_element_t);
> +
> +		r = kvm_read_guest_atomic(vcpu->kvm, base_gpa,
> +				gw->prefetch_ptes, sizeof(gw->prefetch_ptes));
> +		curr_pte = gw->prefetch_ptes[index];

This can slowdown a single non-prefetchable pte fault. Maybe its
irrelevant, but please have kvm_read_guest_atomic in the first patch and
then later optimize, its easier to review and bisectable.

next prev parent reply	other threads:[~2010-08-05 14:39 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-08-04  3:28 [PATCH v6 1/3] export __get_user_pages_fast() function Xiao Guangrong
2010-08-04  3:31 ` [PATCH v6 2/3] KVM: MMU: introduce hva_to_pfn_atomic function Xiao Guangrong
2010-08-04  3:33   ` [PATCH v6 3/3] KVM: MMU: prefetch ptes when intercepted guest #PF Xiao Guangrong
2010-08-05 14:38     ` Marcelo Tosatti [this message]
2010-08-16  1:37       ` Xiao Guangrong
2010-08-16 15:43         ` Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100805143850.GA1648@amt.cnet \
    --to=mtosatti@redhat.com \
    --cc=avi@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=xiaoguangrong@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.