From: Marcelo Tosatti <mtosatti@redhat.com>
To: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Cc: Avi Kivity <avi@redhat.com>, LKML <linux-kernel@vger.kernel.org>,
KVM list <kvm@vger.kernel.org>
Subject: Re: [PATCH v3 9/11] KVM: MMU: prefetch ptes when intercepted guest #PF
Date: Wed, 30 Jun 2010 17:43:24 -0300 [thread overview]
Message-ID: <20100630204324.GA5366@amt.cnet> (raw)
In-Reply-To: <4C2AFB65.2030807@cn.fujitsu.com>
On Wed, Jun 30, 2010 at 04:08:05PM +0800, Xiao Guangrong wrote:
> Support prefetch ptes when intercept guest #PF, avoid to #PF by later
> access
>
> If we meet any failure in the prefetch path, we will exit it and
> not try other ptes to avoid become heavy path
>
> Note: this speculative will mark page become dirty but it not really
> accessed, the same issue is in other speculative paths like invlpg,
> pte write, fortunately, it just affect host memory management. After
> Avi's patchset named "[PATCH v2 1/4] KVM: MMU: Introduce drop_spte()"
> merged, we will easily fix it. Will do it in the future.
>
> Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
> ---
> arch/x86/kvm/mmu.c | 83 ++++++++++++++++++++++++++++++++++++++++++++
> arch/x86/kvm/paging_tmpl.h | 76 ++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 159 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 6673484..fadfafe 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2002,6 +2002,88 @@ static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
> {
> }
>
> +static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
> + struct kvm_mmu_page *sp,
> + u64 *start, u64 *end)
> +{
> + gfn_t gfn;
> + struct page *pages[PTE_PREFETCH_NUM];
> +
> + gfn = sp->gfn + start - sp->spt;
> + while (start < end) {
> + unsigned long addr;
> + int entry, j, ret;
> +
> + addr = gfn_to_hva_many(vcpu->kvm, gfn, &entry);
> + if (kvm_is_error_hva(addr))
> + return -1;
> +
> + entry = min(entry, (int)(end - start));
> + ret = __get_user_pages_fast(addr, entry, 1, pages);
> + if (ret <= 0)
> + return -1;
Why can't you use gfn_to_pfn_atomic() here, one page at a time? Is
the overhead significant that this is worthwhile?
You're bypassing the centralized interface.
> +
> + for (j = 0; j < ret; j++, gfn++, start++)
> + mmu_set_spte(vcpu, start, ACC_ALL,
> + sp->role.access, 0, 0, 1, NULL,
> + sp->role.level, gfn,
> + page_to_pfn(pages[j]), true, false);
> +
> + if (ret < entry)
> + return -1;
> + }
> + return 0;
> +}
> +
> +static void __direct_pte_prefetch(struct kvm_vcpu *vcpu,
> + struct kvm_mmu_page *sp, u64 *sptep)
> +{
> + u64 *start = NULL;
> + int index, i, max;
> +
> + WARN_ON(!sp->role.direct);
> +
> + if (pte_prefetch_topup_memory_cache(vcpu))
> + return;
> +
> + index = sptep - sp->spt;
> + i = index & ~(PTE_PREFETCH_NUM - 1);
> + max = index | (PTE_PREFETCH_NUM - 1);
> +
> + for (; i < max; i++) {
> + u64 *spte = sp->spt + i;
> +
> + if (*spte != shadow_trap_nonpresent_pte || spte == sptep) {
> + if (!start)
> + continue;
> + if (direct_pte_prefetch_many(vcpu, sp, start, spte) < 0)
> + break;
> + start = NULL;
> + } else if (!start)
> + start = spte;
> + }
> +}
> +
> +static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
> +{
> + struct kvm_mmu_page *sp;
> +
> + /*
> + * Since it's no accessed bit on EPT, it's no way to
> + * distinguish between actually accessed translations
> + * and prefetched, so disable pte prefetch if EPT is
> + * enabled.
> + */
> + if (!shadow_accessed_mask)
> + return;
> +
> + sp = page_header(__pa(sptep));
> + if (sp->role.level > PT_PAGE_TABLE_LEVEL)
> + return;
> +
> + __direct_pte_prefetch(vcpu, sp, sptep);
> +}
> +
> static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
> int level, gfn_t gfn, pfn_t pfn)
> {
> @@ -2015,6 +2097,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
> mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL,
> 0, write, 1, &pt_write,
> level, gfn, pfn, false, true);
> + direct_pte_prefetch(vcpu, iterator.sptep);
> ++vcpu->stat.pf_fixed;
> break;
> }
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 3350c02..d8c3be8 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -291,6 +291,81 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
> gpte_to_gfn(gpte), pfn, true, true);
> }
>
> +static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, u64 *sptep)
> +{
> + struct kvm_mmu_page *sp;
> + pt_element_t gptep[PTE_PREFETCH_NUM];
> + gpa_t first_pte_gpa;
> + int offset = 0, index, i, j, max;
> +
> + sp = page_header(__pa(sptep));
> + index = sptep - sp->spt;
> +
> + if (sp->role.level > PT_PAGE_TABLE_LEVEL)
> + return;
> +
> + if (sp->role.direct)
> + return __direct_pte_prefetch(vcpu, sp, sptep);
> +
> + index = sptep - sp->spt;
> + i = index & ~(PTE_PREFETCH_NUM - 1);
> + max = index | (PTE_PREFETCH_NUM - 1);
> +
> + if (PTTYPE == 32)
> + offset = sp->role.quadrant << PT64_LEVEL_BITS;
> +
> + first_pte_gpa = gfn_to_gpa(sp->gfn) +
> + (offset + i) * sizeof(pt_element_t);
> +
> + if (kvm_read_guest_atomic(vcpu->kvm, first_pte_gpa, gptep,
> + sizeof(gptep)) < 0)
> + return;
> +
> + if (pte_prefetch_topup_memory_cache(vcpu))
> + return;
> +
> + for (j = 0; i < max; i++, j++) {
> + pt_element_t gpte;
> + unsigned pte_access;
> + u64 *spte = sp->spt + i;
> + gfn_t gfn;
> + pfn_t pfn;
> +
> + if (spte == sptep)
> + continue;
> +
> + if (*spte != shadow_trap_nonpresent_pte)
> + continue;
> +
> + gpte = gptep[j];
> +
> + if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL))
> + break;
> +
> + if (!(gpte & PT_ACCESSED_MASK))
> + continue;
> +
> + if (!is_present_gpte(gpte)) {
> + if (!sp->unsync)
> + __set_spte(spte, shadow_notrap_nonpresent_pte);
> + continue;
> + }
> +
> + gfn = gpte_to_gfn(gpte);
> +
> + pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
> + if (is_error_pfn(pfn)) {
> + kvm_release_pfn_clean(pfn);
> + break;
> + }
> +
> + pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
> + mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
> + is_dirty_gpte(gpte), NULL, sp->role.level, gfn,
> + pfn, true, false);
reset_host_protection should be true, see commit 1403283acca (also for
direct case to be consistent).
next prev parent reply other threads:[~2010-06-30 20:45 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-06-30 8:02 [PATCH v3 1/11] KVM: MMU: fix writable sync sp mapping Xiao Guangrong
2010-06-30 8:02 ` [PATCH v3 2/11] KVM: MMU: fix conflict access permissions in direct sp Xiao Guangrong
2010-06-30 8:03 ` [PATCH v3 3/11] KVM: MMU: fix direct sp's access corruptted Xiao Guangrong
2010-06-30 19:39 ` Marcelo Tosatti
2010-07-01 0:50 ` Xiao Guangrong
2010-07-01 12:03 ` Marcelo Tosatti
2010-06-30 8:04 ` [PATCH v3 4/11] KVM: MMU: fix forgot to flush all vcpu's tlb Xiao Guangrong
2010-06-30 8:05 ` [PATCH v3 5/11] KVM: MMU: cleanup FNAME(fetch)() functions Xiao Guangrong
2010-07-01 12:05 ` Marcelo Tosatti
2010-06-30 8:05 ` [PATCH v3 6/11] KVM: MMU: introduce gfn_to_pfn_atomic() function Xiao Guangrong
2010-06-30 8:06 ` [PATCH v3 7/11] KVM: MMU: introduce gfn_to_hva_many() function Xiao Guangrong
2010-06-30 8:07 ` [PATCH v3 8/11] KVM: MMU: introduce pte_prefetch_topup_memory_cache() Xiao Guangrong
2010-06-30 8:08 ` [PATCH v3 9/11] KVM: MMU: prefetch ptes when intercepted guest #PF Xiao Guangrong
2010-06-30 20:43 ` Marcelo Tosatti [this message]
2010-07-01 1:11 ` Xiao Guangrong
2010-07-01 12:07 ` Marcelo Tosatti
2010-07-01 12:11 ` Avi Kivity
2010-07-01 12:13 ` Xiao Guangrong
2010-07-01 12:26 ` Marcelo Tosatti
2010-06-30 8:08 ` [PATCH 10/11] KVM: MMU: combine guest pte read between walk and pte prefetch Xiao Guangrong
2010-06-30 8:09 ` [PATCH v3 11/11] KVM: MMU: trace " Xiao Guangrong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100630204324.GA5366@amt.cnet \
--to=mtosatti@redhat.com \
--cc=avi@redhat.com \
--cc=kvm@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=xiaoguangrong@cn.fujitsu.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.