From mboxrd@z Thu Jan 1 00:00:00 1970 From: Nathan Whitehorn Date: Mon, 27 Jul 2015 17:31:52 +0000 Subject: Re: [PATCH 3/3] KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD Message-Id: <55B66B08.5070202@freebsd.org> List-Id: References: <1429853947-30681-1-git-send-email-paulus@samba.org> <1429853947-30681-4-git-send-email-paulus@samba.org> In-Reply-To: <1429853947-30681-4-git-send-email-paulus@samba.org> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit To: Paul Mackerras , kvm-ppc@vger.kernel.org, kvm@vger.kernel.org I've been running with these patches and a FreeBSD guest for a while now and they work very well, providing big performance improvements in tight memory situations. Thanks! I did have to patch QEMU to enable the hypercalls, however -- it might be worth adding them to the default set since these calls are mandated by PAPR as part of the base hypercall interface. -Nathan On 04/23/15 22:39, Paul Mackerras wrote: > This adds implementations for the H_CLEAR_REF (test and clear reference > bit) and H_CLEAR_MOD (test and clear changed bit) hypercalls. > > When clearing the reference or change bit in the guest view of the HPTE, > we also have to clear it in the real HPTE so that we can detect future > references or changes. When we do so, we transfer the R or C bit value > to the rmap entry for the underlying host page so that kvm_age_hva_hv(), > kvm_test_age_hva_hv() and kvmppc_hv_get_dirty_log() know that the page > has been referenced and/or changed. > > These hypercalls are not used by Linux guests and these implementations > are only compile tested. > > Signed-off-by: Paul Mackerras > --- > arch/powerpc/kvm/book3s_hv_rm_mmu.c | 126 ++++++++++++++++++++++++++++++-- > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +- > 2 files changed, 121 insertions(+), 9 deletions(-) > > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > index 24ccc79..479ff7e 100644 > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > @@ -109,25 +109,38 @@ void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize) > } > EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change); > > +/* Returns a pointer to the revmap entry for the page mapped by a HPTE */ > +static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, > + unsigned long hpte_gr) > +{ > + struct kvm_memory_slot *memslot; > + unsigned long *rmap; > + unsigned long gfn; > + > + gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr)); > + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); > + if (!memslot) > + return NULL; > + > + rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); > + return rmap; > +} > + > /* Remove this HPTE from the chain for a real page */ > static void remove_revmap_chain(struct kvm *kvm, long pte_index, > struct revmap_entry *rev, > unsigned long hpte_v, unsigned long hpte_r) > { > struct revmap_entry *next, *prev; > - unsigned long gfn, ptel, head; > - struct kvm_memory_slot *memslot; > + unsigned long ptel, head; > unsigned long *rmap; > unsigned long rcbits; > > rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); > ptel = rev->guest_rpte |= rcbits; > - gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); > - memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); > - if (!memslot) > + rmap = revmap_for_hpte(kvm, hpte_v, ptel); > + if (!rmap) > return; > - > - rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); > lock_rmap(rmap); > > head = *rmap & KVMPPC_RMAP_INDEX; > @@ -662,6 +675,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, > return H_SUCCESS; > } > > +long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, > + unsigned long pte_index) > +{ > + struct kvm *kvm = vcpu->kvm; > + __be64 *hpte; > + unsigned long v, r, gr; > + struct revmap_entry *rev; > + unsigned long *rmap; > + long ret = H_NOT_FOUND; > + > + if (pte_index >= kvm->arch.hpt_npte) > + return H_PARAMETER; > + > + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); > + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); > + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) > + cpu_relax(); > + v = be64_to_cpu(hpte[0]); > + r = be64_to_cpu(hpte[1]); > + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) > + goto out; > + > + gr = rev->guest_rpte; > + if (rev->guest_rpte & HPTE_R_R) { > + rev->guest_rpte &= ~HPTE_R_R; > + note_hpte_modification(kvm, rev); > + } > + if (v & HPTE_V_VALID) { > + gr |= r & (HPTE_R_R | HPTE_R_C); > + if (r & HPTE_R_R) { > + kvmppc_clear_ref_hpte(kvm, hpte, pte_index); > + rmap = revmap_for_hpte(kvm, v, gr); > + if (rmap) { > + lock_rmap(rmap); > + *rmap |= KVMPPC_RMAP_REFERENCED; > + unlock_rmap(rmap); > + } > + } > + } > + vcpu->arch.gpr[4] = gr; > + ret = H_SUCCESS; > + out: > + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); > + return ret; > +} > + > +long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, > + unsigned long pte_index) > +{ > + struct kvm *kvm = vcpu->kvm; > + __be64 *hpte; > + unsigned long v, r, gr; > + struct revmap_entry *rev; > + unsigned long *rmap; > + long ret = H_NOT_FOUND; > + > + if (pte_index >= kvm->arch.hpt_npte) > + return H_PARAMETER; > + > + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); > + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); > + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) > + cpu_relax(); > + v = be64_to_cpu(hpte[0]); > + r = be64_to_cpu(hpte[1]); > + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) > + goto out; > + > + gr = rev->guest_rpte; > + if (gr & HPTE_R_C) { > + rev->guest_rpte &= ~HPTE_R_C; > + note_hpte_modification(kvm, rev); > + } > + if (v & HPTE_V_VALID) { > + /* need to make it temporarily absent so C is stable */ > + hpte[0] |= cpu_to_be64(HPTE_V_ABSENT); > + kvmppc_invalidate_hpte(kvm, hpte, pte_index); > + r = be64_to_cpu(hpte[1]); > + gr |= r & (HPTE_R_R | HPTE_R_C); > + if (r & HPTE_R_C) { > + unsigned long psize = hpte_page_size(v, r); > + hpte[1] = cpu_to_be64(r & ~HPTE_R_C); > + eieio(); > + rmap = revmap_for_hpte(kvm, v, gr); > + if (rmap) { > + lock_rmap(rmap); > + *rmap |= KVMPPC_RMAP_CHANGED; > + kvmppc_update_rmap_change(rmap, psize); > + unlock_rmap(rmap); > + } > + } > + } > + vcpu->arch.gpr[4] = gr; > + ret = H_SUCCESS; > + out: > + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); > + return ret; > +} > + > void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, > unsigned long pte_index) > { > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > index 4d70df2..52752a3 100644 > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > @@ -1816,8 +1816,8 @@ hcall_real_table: > .long DOTSYM(kvmppc_h_remove) - hcall_real_table > .long DOTSYM(kvmppc_h_enter) - hcall_real_table > .long DOTSYM(kvmppc_h_read) - hcall_real_table > - .long 0 /* 0x10 - H_CLEAR_MOD */ > - .long 0 /* 0x14 - H_CLEAR_REF */ > + .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table > + .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table > .long DOTSYM(kvmppc_h_protect) - hcall_real_table > .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table > .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table From mboxrd@z Thu Jan 1 00:00:00 1970 From: Nathan Whitehorn Subject: Re: [PATCH 3/3] KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD Date: Mon, 27 Jul 2015 10:31:52 -0700 Message-ID: <55B66B08.5070202@freebsd.org> References: <1429853947-30681-1-git-send-email-paulus@samba.org> <1429853947-30681-4-git-send-email-paulus@samba.org> Mime-Version: 1.0 Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit To: Paul Mackerras , kvm-ppc@vger.kernel.org, kvm@vger.kernel.org Return-path: Received: from c.mail.sonic.net ([64.142.111.80]:59763 "EHLO c.mail.sonic.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750803AbbG0RnA (ORCPT ); Mon, 27 Jul 2015 13:43:00 -0400 In-Reply-To: <1429853947-30681-4-git-send-email-paulus@samba.org> Sender: kvm-owner@vger.kernel.org List-ID: I've been running with these patches and a FreeBSD guest for a while now and they work very well, providing big performance improvements in tight memory situations. Thanks! I did have to patch QEMU to enable the hypercalls, however -- it might be worth adding them to the default set since these calls are mandated by PAPR as part of the base hypercall interface. -Nathan On 04/23/15 22:39, Paul Mackerras wrote: > This adds implementations for the H_CLEAR_REF (test and clear reference > bit) and H_CLEAR_MOD (test and clear changed bit) hypercalls. > > When clearing the reference or change bit in the guest view of the HPTE, > we also have to clear it in the real HPTE so that we can detect future > references or changes. When we do so, we transfer the R or C bit value > to the rmap entry for the underlying host page so that kvm_age_hva_hv(), > kvm_test_age_hva_hv() and kvmppc_hv_get_dirty_log() know that the page > has been referenced and/or changed. > > These hypercalls are not used by Linux guests and these implementations > are only compile tested. > > Signed-off-by: Paul Mackerras > --- > arch/powerpc/kvm/book3s_hv_rm_mmu.c | 126 ++++++++++++++++++++++++++++++-- > arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +- > 2 files changed, 121 insertions(+), 9 deletions(-) > > diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > index 24ccc79..479ff7e 100644 > --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c > +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c > @@ -109,25 +109,38 @@ void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize) > } > EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change); > > +/* Returns a pointer to the revmap entry for the page mapped by a HPTE */ > +static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v, > + unsigned long hpte_gr) > +{ > + struct kvm_memory_slot *memslot; > + unsigned long *rmap; > + unsigned long gfn; > + > + gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr)); > + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); > + if (!memslot) > + return NULL; > + > + rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); > + return rmap; > +} > + > /* Remove this HPTE from the chain for a real page */ > static void remove_revmap_chain(struct kvm *kvm, long pte_index, > struct revmap_entry *rev, > unsigned long hpte_v, unsigned long hpte_r) > { > struct revmap_entry *next, *prev; > - unsigned long gfn, ptel, head; > - struct kvm_memory_slot *memslot; > + unsigned long ptel, head; > unsigned long *rmap; > unsigned long rcbits; > > rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); > ptel = rev->guest_rpte |= rcbits; > - gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); > - memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn); > - if (!memslot) > + rmap = revmap_for_hpte(kvm, hpte_v, ptel); > + if (!rmap) > return; > - > - rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]); > lock_rmap(rmap); > > head = *rmap & KVMPPC_RMAP_INDEX; > @@ -662,6 +675,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, > return H_SUCCESS; > } > > +long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags, > + unsigned long pte_index) > +{ > + struct kvm *kvm = vcpu->kvm; > + __be64 *hpte; > + unsigned long v, r, gr; > + struct revmap_entry *rev; > + unsigned long *rmap; > + long ret = H_NOT_FOUND; > + > + if (pte_index >= kvm->arch.hpt_npte) > + return H_PARAMETER; > + > + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); > + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); > + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) > + cpu_relax(); > + v = be64_to_cpu(hpte[0]); > + r = be64_to_cpu(hpte[1]); > + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) > + goto out; > + > + gr = rev->guest_rpte; > + if (rev->guest_rpte & HPTE_R_R) { > + rev->guest_rpte &= ~HPTE_R_R; > + note_hpte_modification(kvm, rev); > + } > + if (v & HPTE_V_VALID) { > + gr |= r & (HPTE_R_R | HPTE_R_C); > + if (r & HPTE_R_R) { > + kvmppc_clear_ref_hpte(kvm, hpte, pte_index); > + rmap = revmap_for_hpte(kvm, v, gr); > + if (rmap) { > + lock_rmap(rmap); > + *rmap |= KVMPPC_RMAP_REFERENCED; > + unlock_rmap(rmap); > + } > + } > + } > + vcpu->arch.gpr[4] = gr; > + ret = H_SUCCESS; > + out: > + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); > + return ret; > +} > + > +long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, > + unsigned long pte_index) > +{ > + struct kvm *kvm = vcpu->kvm; > + __be64 *hpte; > + unsigned long v, r, gr; > + struct revmap_entry *rev; > + unsigned long *rmap; > + long ret = H_NOT_FOUND; > + > + if (pte_index >= kvm->arch.hpt_npte) > + return H_PARAMETER; > + > + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]); > + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4)); > + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) > + cpu_relax(); > + v = be64_to_cpu(hpte[0]); > + r = be64_to_cpu(hpte[1]); > + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT))) > + goto out; > + > + gr = rev->guest_rpte; > + if (gr & HPTE_R_C) { > + rev->guest_rpte &= ~HPTE_R_C; > + note_hpte_modification(kvm, rev); > + } > + if (v & HPTE_V_VALID) { > + /* need to make it temporarily absent so C is stable */ > + hpte[0] |= cpu_to_be64(HPTE_V_ABSENT); > + kvmppc_invalidate_hpte(kvm, hpte, pte_index); > + r = be64_to_cpu(hpte[1]); > + gr |= r & (HPTE_R_R | HPTE_R_C); > + if (r & HPTE_R_C) { > + unsigned long psize = hpte_page_size(v, r); > + hpte[1] = cpu_to_be64(r & ~HPTE_R_C); > + eieio(); > + rmap = revmap_for_hpte(kvm, v, gr); > + if (rmap) { > + lock_rmap(rmap); > + *rmap |= KVMPPC_RMAP_CHANGED; > + kvmppc_update_rmap_change(rmap, psize); > + unlock_rmap(rmap); > + } > + } > + } > + vcpu->arch.gpr[4] = gr; > + ret = H_SUCCESS; > + out: > + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK); > + return ret; > +} > + > void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, > unsigned long pte_index) > { > diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > index 4d70df2..52752a3 100644 > --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S > +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S > @@ -1816,8 +1816,8 @@ hcall_real_table: > .long DOTSYM(kvmppc_h_remove) - hcall_real_table > .long DOTSYM(kvmppc_h_enter) - hcall_real_table > .long DOTSYM(kvmppc_h_read) - hcall_real_table > - .long 0 /* 0x10 - H_CLEAR_MOD */ > - .long 0 /* 0x14 - H_CLEAR_REF */ > + .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table > + .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table > .long DOTSYM(kvmppc_h_protect) - hcall_real_table > .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table > .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table