From: Nathan Whitehorn <nwhitehorn@freebsd.org>
To: Paul Mackerras <paulus@samba.org>,
kvm-ppc@vger.kernel.org, kvm@vger.kernel.org
Subject: Re: [PATCH 3/3] KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD
Date: Mon, 27 Jul 2015 17:31:52 +0000 [thread overview]
Message-ID: <55B66B08.5070202@freebsd.org> (raw)
In-Reply-To: <1429853947-30681-4-git-send-email-paulus@samba.org>
I've been running with these patches and a FreeBSD guest for a while now
and they work very well, providing big performance improvements in tight
memory situations. Thanks! I did have to patch QEMU to enable the
hypercalls, however -- it might be worth adding them to the default set
since these calls are mandated by PAPR as part of the base hypercall
interface.
-Nathan
On 04/23/15 22:39, Paul Mackerras wrote:
> This adds implementations for the H_CLEAR_REF (test and clear reference
> bit) and H_CLEAR_MOD (test and clear changed bit) hypercalls.
>
> When clearing the reference or change bit in the guest view of the HPTE,
> we also have to clear it in the real HPTE so that we can detect future
> references or changes. When we do so, we transfer the R or C bit value
> to the rmap entry for the underlying host page so that kvm_age_hva_hv(),
> kvm_test_age_hva_hv() and kvmppc_hv_get_dirty_log() know that the page
> has been referenced and/or changed.
>
> These hypercalls are not used by Linux guests and these implementations
> are only compile tested.
>
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> ---
> arch/powerpc/kvm/book3s_hv_rm_mmu.c | 126 ++++++++++++++++++++++++++++++--
> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +-
> 2 files changed, 121 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 24ccc79..479ff7e 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -109,25 +109,38 @@ void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
> }
> EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
>
> +/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
> +static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
> + unsigned long hpte_gr)
> +{
> + struct kvm_memory_slot *memslot;
> + unsigned long *rmap;
> + unsigned long gfn;
> +
> + gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
> + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
> + if (!memslot)
> + return NULL;
> +
> + rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
> + return rmap;
> +}
> +
> /* Remove this HPTE from the chain for a real page */
> static void remove_revmap_chain(struct kvm *kvm, long pte_index,
> struct revmap_entry *rev,
> unsigned long hpte_v, unsigned long hpte_r)
> {
> struct revmap_entry *next, *prev;
> - unsigned long gfn, ptel, head;
> - struct kvm_memory_slot *memslot;
> + unsigned long ptel, head;
> unsigned long *rmap;
> unsigned long rcbits;
>
> rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
> ptel = rev->guest_rpte |= rcbits;
> - gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
> - memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
> - if (!memslot)
> + rmap = revmap_for_hpte(kvm, hpte_v, ptel);
> + if (!rmap)
> return;
> -
> - rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
> lock_rmap(rmap);
>
> head = *rmap & KVMPPC_RMAP_INDEX;
> @@ -662,6 +675,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
> return H_SUCCESS;
> }
>
> +long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
> + unsigned long pte_index)
> +{
> + struct kvm *kvm = vcpu->kvm;
> + __be64 *hpte;
> + unsigned long v, r, gr;
> + struct revmap_entry *rev;
> + unsigned long *rmap;
> + long ret = H_NOT_FOUND;
> +
> + if (pte_index >= kvm->arch.hpt_npte)
> + return H_PARAMETER;
> +
> + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
> + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
> + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
> + cpu_relax();
> + v = be64_to_cpu(hpte[0]);
> + r = be64_to_cpu(hpte[1]);
> + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
> + goto out;
> +
> + gr = rev->guest_rpte;
> + if (rev->guest_rpte & HPTE_R_R) {
> + rev->guest_rpte &= ~HPTE_R_R;
> + note_hpte_modification(kvm, rev);
> + }
> + if (v & HPTE_V_VALID) {
> + gr |= r & (HPTE_R_R | HPTE_R_C);
> + if (r & HPTE_R_R) {
> + kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
> + rmap = revmap_for_hpte(kvm, v, gr);
> + if (rmap) {
> + lock_rmap(rmap);
> + *rmap |= KVMPPC_RMAP_REFERENCED;
> + unlock_rmap(rmap);
> + }
> + }
> + }
> + vcpu->arch.gpr[4] = gr;
> + ret = H_SUCCESS;
> + out:
> + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
> + return ret;
> +}
> +
> +long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
> + unsigned long pte_index)
> +{
> + struct kvm *kvm = vcpu->kvm;
> + __be64 *hpte;
> + unsigned long v, r, gr;
> + struct revmap_entry *rev;
> + unsigned long *rmap;
> + long ret = H_NOT_FOUND;
> +
> + if (pte_index >= kvm->arch.hpt_npte)
> + return H_PARAMETER;
> +
> + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
> + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
> + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
> + cpu_relax();
> + v = be64_to_cpu(hpte[0]);
> + r = be64_to_cpu(hpte[1]);
> + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
> + goto out;
> +
> + gr = rev->guest_rpte;
> + if (gr & HPTE_R_C) {
> + rev->guest_rpte &= ~HPTE_R_C;
> + note_hpte_modification(kvm, rev);
> + }
> + if (v & HPTE_V_VALID) {
> + /* need to make it temporarily absent so C is stable */
> + hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
> + kvmppc_invalidate_hpte(kvm, hpte, pte_index);
> + r = be64_to_cpu(hpte[1]);
> + gr |= r & (HPTE_R_R | HPTE_R_C);
> + if (r & HPTE_R_C) {
> + unsigned long psize = hpte_page_size(v, r);
> + hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
> + eieio();
> + rmap = revmap_for_hpte(kvm, v, gr);
> + if (rmap) {
> + lock_rmap(rmap);
> + *rmap |= KVMPPC_RMAP_CHANGED;
> + kvmppc_update_rmap_change(rmap, psize);
> + unlock_rmap(rmap);
> + }
> + }
> + }
> + vcpu->arch.gpr[4] = gr;
> + ret = H_SUCCESS;
> + out:
> + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
> + return ret;
> +}
> +
> void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
> unsigned long pte_index)
> {
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 4d70df2..52752a3 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -1816,8 +1816,8 @@ hcall_real_table:
> .long DOTSYM(kvmppc_h_remove) - hcall_real_table
> .long DOTSYM(kvmppc_h_enter) - hcall_real_table
> .long DOTSYM(kvmppc_h_read) - hcall_real_table
> - .long 0 /* 0x10 - H_CLEAR_MOD */
> - .long 0 /* 0x14 - H_CLEAR_REF */
> + .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
> + .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
> .long DOTSYM(kvmppc_h_protect) - hcall_real_table
> .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table
> .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table
WARNING: multiple messages have this Message-ID (diff)
From: Nathan Whitehorn <nwhitehorn@freebsd.org>
To: Paul Mackerras <paulus@samba.org>,
kvm-ppc@vger.kernel.org, kvm@vger.kernel.org
Subject: Re: [PATCH 3/3] KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD
Date: Mon, 27 Jul 2015 10:31:52 -0700 [thread overview]
Message-ID: <55B66B08.5070202@freebsd.org> (raw)
In-Reply-To: <1429853947-30681-4-git-send-email-paulus@samba.org>
I've been running with these patches and a FreeBSD guest for a while now
and they work very well, providing big performance improvements in tight
memory situations. Thanks! I did have to patch QEMU to enable the
hypercalls, however -- it might be worth adding them to the default set
since these calls are mandated by PAPR as part of the base hypercall
interface.
-Nathan
On 04/23/15 22:39, Paul Mackerras wrote:
> This adds implementations for the H_CLEAR_REF (test and clear reference
> bit) and H_CLEAR_MOD (test and clear changed bit) hypercalls.
>
> When clearing the reference or change bit in the guest view of the HPTE,
> we also have to clear it in the real HPTE so that we can detect future
> references or changes. When we do so, we transfer the R or C bit value
> to the rmap entry for the underlying host page so that kvm_age_hva_hv(),
> kvm_test_age_hva_hv() and kvmppc_hv_get_dirty_log() know that the page
> has been referenced and/or changed.
>
> These hypercalls are not used by Linux guests and these implementations
> are only compile tested.
>
> Signed-off-by: Paul Mackerras <paulus@samba.org>
> ---
> arch/powerpc/kvm/book3s_hv_rm_mmu.c | 126 ++++++++++++++++++++++++++++++--
> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +-
> 2 files changed, 121 insertions(+), 9 deletions(-)
>
> diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> index 24ccc79..479ff7e 100644
> --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
> @@ -109,25 +109,38 @@ void kvmppc_update_rmap_change(unsigned long *rmap, unsigned long psize)
> }
> EXPORT_SYMBOL_GPL(kvmppc_update_rmap_change);
>
> +/* Returns a pointer to the revmap entry for the page mapped by a HPTE */
> +static unsigned long *revmap_for_hpte(struct kvm *kvm, unsigned long hpte_v,
> + unsigned long hpte_gr)
> +{
> + struct kvm_memory_slot *memslot;
> + unsigned long *rmap;
> + unsigned long gfn;
> +
> + gfn = hpte_rpn(hpte_gr, hpte_page_size(hpte_v, hpte_gr));
> + memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
> + if (!memslot)
> + return NULL;
> +
> + rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
> + return rmap;
> +}
> +
> /* Remove this HPTE from the chain for a real page */
> static void remove_revmap_chain(struct kvm *kvm, long pte_index,
> struct revmap_entry *rev,
> unsigned long hpte_v, unsigned long hpte_r)
> {
> struct revmap_entry *next, *prev;
> - unsigned long gfn, ptel, head;
> - struct kvm_memory_slot *memslot;
> + unsigned long ptel, head;
> unsigned long *rmap;
> unsigned long rcbits;
>
> rcbits = hpte_r & (HPTE_R_R | HPTE_R_C);
> ptel = rev->guest_rpte |= rcbits;
> - gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
> - memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
> - if (!memslot)
> + rmap = revmap_for_hpte(kvm, hpte_v, ptel);
> + if (!rmap)
> return;
> -
> - rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
> lock_rmap(rmap);
>
> head = *rmap & KVMPPC_RMAP_INDEX;
> @@ -662,6 +675,105 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
> return H_SUCCESS;
> }
>
> +long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
> + unsigned long pte_index)
> +{
> + struct kvm *kvm = vcpu->kvm;
> + __be64 *hpte;
> + unsigned long v, r, gr;
> + struct revmap_entry *rev;
> + unsigned long *rmap;
> + long ret = H_NOT_FOUND;
> +
> + if (pte_index >= kvm->arch.hpt_npte)
> + return H_PARAMETER;
> +
> + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
> + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
> + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
> + cpu_relax();
> + v = be64_to_cpu(hpte[0]);
> + r = be64_to_cpu(hpte[1]);
> + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
> + goto out;
> +
> + gr = rev->guest_rpte;
> + if (rev->guest_rpte & HPTE_R_R) {
> + rev->guest_rpte &= ~HPTE_R_R;
> + note_hpte_modification(kvm, rev);
> + }
> + if (v & HPTE_V_VALID) {
> + gr |= r & (HPTE_R_R | HPTE_R_C);
> + if (r & HPTE_R_R) {
> + kvmppc_clear_ref_hpte(kvm, hpte, pte_index);
> + rmap = revmap_for_hpte(kvm, v, gr);
> + if (rmap) {
> + lock_rmap(rmap);
> + *rmap |= KVMPPC_RMAP_REFERENCED;
> + unlock_rmap(rmap);
> + }
> + }
> + }
> + vcpu->arch.gpr[4] = gr;
> + ret = H_SUCCESS;
> + out:
> + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
> + return ret;
> +}
> +
> +long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
> + unsigned long pte_index)
> +{
> + struct kvm *kvm = vcpu->kvm;
> + __be64 *hpte;
> + unsigned long v, r, gr;
> + struct revmap_entry *rev;
> + unsigned long *rmap;
> + long ret = H_NOT_FOUND;
> +
> + if (pte_index >= kvm->arch.hpt_npte)
> + return H_PARAMETER;
> +
> + rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
> + hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
> + while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
> + cpu_relax();
> + v = be64_to_cpu(hpte[0]);
> + r = be64_to_cpu(hpte[1]);
> + if (!(v & (HPTE_V_VALID | HPTE_V_ABSENT)))
> + goto out;
> +
> + gr = rev->guest_rpte;
> + if (gr & HPTE_R_C) {
> + rev->guest_rpte &= ~HPTE_R_C;
> + note_hpte_modification(kvm, rev);
> + }
> + if (v & HPTE_V_VALID) {
> + /* need to make it temporarily absent so C is stable */
> + hpte[0] |= cpu_to_be64(HPTE_V_ABSENT);
> + kvmppc_invalidate_hpte(kvm, hpte, pte_index);
> + r = be64_to_cpu(hpte[1]);
> + gr |= r & (HPTE_R_R | HPTE_R_C);
> + if (r & HPTE_R_C) {
> + unsigned long psize = hpte_page_size(v, r);
> + hpte[1] = cpu_to_be64(r & ~HPTE_R_C);
> + eieio();
> + rmap = revmap_for_hpte(kvm, v, gr);
> + if (rmap) {
> + lock_rmap(rmap);
> + *rmap |= KVMPPC_RMAP_CHANGED;
> + kvmppc_update_rmap_change(rmap, psize);
> + unlock_rmap(rmap);
> + }
> + }
> + }
> + vcpu->arch.gpr[4] = gr;
> + ret = H_SUCCESS;
> + out:
> + unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
> + return ret;
> +}
> +
> void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
> unsigned long pte_index)
> {
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index 4d70df2..52752a3 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -1816,8 +1816,8 @@ hcall_real_table:
> .long DOTSYM(kvmppc_h_remove) - hcall_real_table
> .long DOTSYM(kvmppc_h_enter) - hcall_real_table
> .long DOTSYM(kvmppc_h_read) - hcall_real_table
> - .long 0 /* 0x10 - H_CLEAR_MOD */
> - .long 0 /* 0x14 - H_CLEAR_REF */
> + .long DOTSYM(kvmppc_h_clear_mod) - hcall_real_table
> + .long DOTSYM(kvmppc_h_clear_ref) - hcall_real_table
> .long DOTSYM(kvmppc_h_protect) - hcall_real_table
> .long DOTSYM(kvmppc_h_get_tce) - hcall_real_table
> .long DOTSYM(kvmppc_h_put_tce) - hcall_real_table
next prev parent reply other threads:[~2015-07-27 17:31 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-04-24 5:39 [PATCH 0/3] PPC HV bug fixes + hcalls for FreeBSD Paul Mackerras
2015-04-24 5:39 ` Paul Mackerras
2015-04-24 5:39 ` [PATCH 1/3] KVM: PPC: Book3S HV: Fix race in reading change bit when removing HPTE Paul Mackerras
2015-04-24 5:39 ` Paul Mackerras
2015-04-28 5:06 ` Aneesh Kumar K.V
2015-04-28 5:18 ` Aneesh Kumar K.V
2015-04-28 9:10 ` Paul Mackerras
2015-04-28 9:10 ` Paul Mackerras
2015-04-24 5:39 ` [PATCH 2/3] KVM: PPC: Book3S HV: Fix bug in dirty page tracking Paul Mackerras
2015-04-24 5:39 ` Paul Mackerras
2015-04-24 5:39 ` [PATCH 3/3] KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD Paul Mackerras
2015-04-24 5:39 ` Paul Mackerras
2015-07-27 17:31 ` Nathan Whitehorn [this message]
2015-07-27 17:31 ` Nathan Whitehorn
2015-09-06 19:47 ` Nathan Whitehorn
2015-09-06 19:47 ` Nathan Whitehorn
2015-09-06 23:52 ` Paul Mackerras
2015-09-06 23:52 ` Paul Mackerras
2015-09-07 0:46 ` Nathan Whitehorn
2015-09-07 0:46 ` Nathan Whitehorn
2015-09-12 16:47 ` Nathan Whitehorn
2015-09-12 16:47 ` Nathan Whitehorn
2015-09-12 16:51 ` Alexander Graf
2015-09-12 16:51 ` Alexander Graf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=55B66B08.5070202@freebsd.org \
--to=nwhitehorn@freebsd.org \
--cc=kvm-ppc@vger.kernel.org \
--cc=kvm@vger.kernel.org \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.