From mboxrd@z Thu Jan 1 00:00:00 1970 From: Nathan Whitehorn Subject: Re: [PATCH 3/3] KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD Date: Sun, 6 Sep 2015 12:47:12 -0700 Message-ID: <55EC9840.1010701@freebsd.org> References: <1429853947-30681-1-git-send-email-paulus@samba.org> <1429853947-30681-4-git-send-email-paulus@samba.org> <55B66B08.5070202@freebsd.org> Mime-Version: 1.0 Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit To: Paul Mackerras , kvm-ppc@vger.kernel.org, kvm@vger.kernel.org Return-path: Received: from d.mail.sonic.net ([64.142.111.50]:54063 "EHLO d.mail.sonic.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751262AbbIFT6I (ORCPT ); Sun, 6 Sep 2015 15:58:08 -0400 In-Reply-To: <55B66B08.5070202@freebsd.org> Sender: kvm-owner@vger.kernel.org List-ID: Anything I can do to help move these along? It's a big performance improvement for FreeBSD guests. -Nathan On 07/27/15 10:31, Nathan Whitehorn wrote: > I've been running with these patches and a FreeBSD guest for a while > now and they work very well, providing big performance improvements in > tight memory situations. Thanks! I did have to patch QEMU to enable > the hypercalls, however -- it might be worth adding them to the > default set since these calls are mandated by PAPR as part of the base > hypercall interface. > -Nathan > > On 04/23/15 22:39, Paul Mackerras wrote: >> This adds implementations for the H_CLEAR_REF (test and clear reference >> bit) and H_CLEAR_MOD (test and clear changed bit) hypercalls. >> >> When clearing the reference or change bit in the guest view of the HPTE, >> we also have to clear it in the real HPTE so that we can detect future >> references or changes. When we do so, we transfer the R or C bit value >> to the rmap entry for the underlying host page so that kvm_age_hva_hv(), >> kvm_test_age_hva_hv() and kvmppc_hv_get_dirty_log() know that the page >> has been referenced and/or changed. >> >> These hypercalls are not used by Linux guests and these implementations >> are only compile tested. >> >> Signed-off-by: Paul Mackerras >> --- >> arch/powerpc/kvm/book3s_hv_rm_mmu.c | 126 >> ++++++++++++++++++++++++++++++-- >> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +- >> 2 files changed, 121 insertions(+), 9 deletions(-) >> >> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c >> b/arch/powerpc/kvm/book3s_hv_rm_mmu.c >> index 24ccc79..479ff7e 100644 >> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c >> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c >> @@ -109,25 +109,38 @@ void kvmppc_update_rmap_change(unsigned long >> *rmap, unsigned long psize) >> } >> EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change); >> +/* Returns a pointer to the revmap entry for the page mapped by a >> HPTE */ >> +static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long >> hpte_v, >> + unsigned long hpte_gr) >> +{ >> + struct kvm_memory_slot *memslot; >> + unsigned long *rmap; >> + unsigned long gfn; >> + >> + gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr)); >> + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); >> + if (!memslot) >> + return NULL; >> + >> + rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - >> memslot->base_gfn]); >> + return rmap; >> +} >> + >> /* Remove this HPTE from the chain for a real page */ >> static void remove_revmap_chain(struct kvm *kvm, long pte_index, >> struct revmap_entry *rev, >> unsigned long hpte_v, unsigned long hpte_r) >> { >> struct revmap_entry *next, *prev; >> - unsigned long gfn, ptel, head; >> - struct kvm_memory_slot *memslot; >> + unsigned long ptel, head; >> unsigned long *rmap; >> unsigned long rcbits; >> rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); >> ptel = rev->guest_rpte |= rcbits; >> - gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); >> - memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); >> - if (!memslot) >> + rmap = revmap_for_hpte(kvm, hpte_v, ptel); >> + if (!rmap) >> return; >> - >> - rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - >> memslot->base_gfn]); >> lock_rmap(rmap); >> head = *rmap & KVMPPC_RMAP_INDEX; >> @@ -662,6 +675,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, >> unsigned long flags, >> return H_SUCCESS; >> } >> +long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, >> + unsigned long pte_index) >> +{ >> + struct kvm *kvm = vcpu->kvm; >> + __be64 *hpte; >> + unsigned long v, r, gr; >> + struct revmap_entry *rev; >> + unsigned long *rmap; >> + long ret = H_NOT_FOUND; >> + >> + if (pte_index >= kvm->arch.hpt_npte) >> + return H_PARAMETER; >> + >> + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); >> + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); >> + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) >> + cpu_relax(); >> + v = be64_to_cpu(hpte[0]); >> + r = be64_to_cpu(hpte[1]); >> + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) >> + goto out; >> + >> + gr = rev->guest_rpte; >> + if (rev->guest_rpte & HPTE_R_R) { >> + rev->guest_rpte &= ~HPTE_R_R; >> + note_hpte_modification(kvm, rev); >> + } >> + if (v & HPTE_V_VALID) { >> + gr |= r & (HPTE_R_R | HPTE_R_C); >> + if (r & HPTE_R_R) { >> + kvmppc_clear_ref_hpte(kvm, hpte, pte_index); >> + rmap = revmap_for_hpte(kvm, v, gr); >> + if (rmap) { >> + lock_rmap(rmap); >> + *rmap |= KVMPPC_RMAP_REFERENCED; >> + unlock_rmap(rmap); >> + } >> + } >> + } >> + vcpu->arch.gpr[4] = gr; >> + ret = H_SUCCESS; >> + out: >> + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); >> + return ret; >> +} >> + >> +long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, >> + unsigned long pte_index) >> +{ >> + struct kvm *kvm = vcpu->kvm; >> + __be64 *hpte; >> + unsigned long v, r, gr; >> + struct revmap_entry *rev; >> + unsigned long *rmap; >> + long ret = H_NOT_FOUND; >> + >> + if (pte_index >= kvm->arch.hpt_npte) >> + return H_PARAMETER; >> + >> + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); >> + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); >> + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) >> + cpu_relax(); >> + v = be64_to_cpu(hpte[0]); >> + r = be64_to_cpu(hpte[1]); >> + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) >> + goto out; >> + >> + gr = rev->guest_rpte; >> + if (gr & HPTE_R_C) { >> + rev->guest_rpte &= ~HPTE_R_C; >> + note_hpte_modification(kvm, rev); >> + } >> + if (v & HPTE_V_VALID) { >> + /* need to make it temporarily absent so C is stable */ >> + hpte[0] |= cpu_to_be64(HPTE_V_ABSENT); >> + kvmppc_invalidate_hpte(kvm, hpte, pte_index); >> + r = be64_to_cpu(hpte[1]); >> + gr |= r & (HPTE_R_R | HPTE_R_C); >> + if (r & HPTE_R_C) { >> + unsigned long psize = hpte_page_size(v, r); >> + hpte[1] = cpu_to_be64(r & ~HPTE_R_C); >> + eieio(); >> + rmap = revmap_for_hpte(kvm, v, gr); >> + if (rmap) { >> + lock_rmap(rmap); >> + *rmap |= KVMPPC_RMAP_CHANGED; >> + kvmppc_update_rmap_change(rmap, psize); >> + unlock_rmap(rmap); >> + } >> + } >> + } >> + vcpu->arch.gpr[4] = gr; >> + ret = H_SUCCESS; >> + out: >> + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); >> + return ret; >> +} >> + >> void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, >> unsigned long pte_index) >> { >> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S >> b/arch/powerpc/kvm/book3s_hv_rmhandlers.S >> index 4d70df2..52752a3 100644 >> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S >> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S >> @@ -1816,8 +1816,8 @@ hcall_real_table: >> .long DOTSYM(kvmppc_h_remove) - hcall_real_table >> .long DOTSYM(kvmppc_h_enter) - hcall_real_table >> .long DOTSYM(kvmppc_h_read) - hcall_real_table >> - .long 0 /* 0x10 - H_CLEAR_MOD */ >> - .long 0 /* 0x14 - H_CLEAR_REF */ >> + .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table >> + .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table >> .long DOTSYM(kvmppc_h_protect) - hcall_real_table >> .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table >> .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table >