From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
To: Paul Mackerras <paulus@ozlabs.org>,
kvm-ppc@vger.kernel.org, kvm@vger.kernel.org
Subject: Re: [PATCH] KVM: PPC: Book3S HV: Fix migration and HPT resizing of HPT guests on radix hosts
Date: Wed, 22 Nov 2017 05:55:09 +0000 [thread overview]
Message-ID: <1511330109.5166.0.camel@gmail.com> (raw)
In-Reply-To: <20171122034152.GA30117@fergus.ozlabs.ibm.com>
On Wed, 2017-11-22 at 14:41 +1100, Paul Mackerras wrote:
> From a038eb6fa13cc49cc91e6e81409c230ef8086038 Mon Sep 17 00:00:00
> 2001
> From: Paul Mackerras <paulus@ozlabs.org>
> Date: Wed, 22 Nov 2017 14:38:53 +1100
> Subject:
>
> This fixes two errors that prevent a guest using the HPT MMU from
> successfully migrating to a POWER9 host in radix MMU mode, or
> resizing
> its HPT when running on a radix host.
>
> The first bug was that commit 8dc6cca556e4 ("KVM: PPC: Book3S HV:
> Don't rely on host's page size information", 2017-09-11) missed two
> uses of hpte_base_page_size(), one in the HPT rehashing code and
> one in kvm_htab_write() (which is used on the destination side in
> migrating a HPT guest). Instead we use
> kvmppc_hpte_base_page_shift().
> Having the shift count means that we can use left and right shifts
> instead of multiplication and division in a few places.
>
> Along the way, this adds a check in kvm_htab_write() to ensure that
> the
> page size encoding in the incoming HPTEs is recognized, and if not
> return an EINVAL error to userspace.
>
> The second bug was that kvm_htab_write was performing some but not
> all
> of the functions of kvmhv_setup_mmu(), resulting in the destination
> VM
> being left in radix mode as far as the hardware is concerned. The
> simplest fix for now is make kvm_htab_write() call
> kvmppc_setup_partition_table() like kvmppc_hv_setup_htab_rma() does.
> In future it would be better to refactor the code more extensively
> to remove the duplication.
>
> Fixes: 8dc6cca556e4 ("KVM: PPC: Book3S HV: Don't rely on host's page
> size information")
> Fixes: 7a84084c6054 ("KVM: PPC: Book3S HV: Set partition table rather
> than SDR1 on POWER9")
> Reported-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Tested-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> ---
> arch/powerpc/include/asm/kvm_ppc.h | 1 +
> arch/powerpc/kvm/book3s_64_mmu_hv.c | 37 +++++++++++++++++++++++--
> ------------
> arch/powerpc/kvm/book3s_hv.c | 3 +--
> 3 files changed, 25 insertions(+), 16 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> b/arch/powerpc/include/asm/kvm_ppc.h
> index 96753f3..941c2a3 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -180,6 +180,7 @@ extern void
> kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
> struct iommu_group *grp);
> extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
> extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
> +extern void kvmppc_setup_partition_table(struct kvm *kvm);
>
> extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
> struct kvm_create_spapr_tce_64
> *args);
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 235319c..9660972 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -1238,8 +1238,9 @@ static unsigned long
> resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
> unsigned long vpte, rpte, guest_rpte;
> int ret;
> struct revmap_entry *rev;
> - unsigned long apsize, psize, avpn, pteg, hash;
> + unsigned long apsize, avpn, pteg, hash;
> unsigned long new_idx, new_pteg, replace_vpte;
> + int pshift;
>
> hptep = (__be64 *)(old->virt + (idx << 4));
>
> @@ -1298,8 +1299,8 @@ static unsigned long
> resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
> goto out;
>
> rpte = be64_to_cpu(hptep[1]);
> - psize = hpte_base_page_size(vpte, rpte);
> - avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
> + pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
> + avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >>
> 23);
> pteg = idx / HPTES_PER_GROUP;
> if (vpte & HPTE_V_SECONDARY)
> pteg = ~pteg;
> @@ -1311,20 +1312,20 @@ static unsigned long
> resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
> offset = (avpn & 0x1f) << 23;
> vsid = avpn >> 5;
> /* We can find more bits from the pteg value */
> - if (psize < (1ULL << 23))
> - offset |= ((vsid ^ pteg) & old_hash_mask) *
> psize;
> + if (pshift < 23)
> + offset |= ((vsid ^ pteg) & old_hash_mask) <<
> pshift;
>
> - hash = vsid ^ (offset / psize);
> + hash = vsid ^ (offset >> pshift);
> } else {
> unsigned long offset, vsid;
>
> /* We only have 40 - 23 bits of seg_off in avpn */
> offset = (avpn & 0x1ffff) << 23;
> vsid = avpn >> 17;
> - if (psize < (1ULL << 23))
> - offset |= ((vsid ^ (vsid << 25) ^ pteg) &
> old_hash_mask) * psize;
> + if (pshift < 23)
> + offset |= ((vsid ^ (vsid << 25) ^ pteg) &
> old_hash_mask) << pshift;
>
> - hash = vsid ^ (vsid << 25) ^ (offset / psize);
> + hash = vsid ^ (vsid << 25) ^ (offset >> pshift);
> }
>
> new_pteg = hash & new_hash_mask;
> @@ -1801,6 +1802,7 @@ static ssize_t kvm_htab_write(struct file
> *file, const char __user *buf,
> ssize_t nb;
> long int err, ret;
> int mmu_ready;
> + int pshift;
>
> if (!access_ok(VERIFY_READ, buf, count))
> return -EFAULT;
> @@ -1855,6 +1857,9 @@ static ssize_t kvm_htab_write(struct file
> *file, const char __user *buf,
> err = -EINVAL;
> if (!(v & HPTE_V_VALID))
> goto out;
> + pshift = kvmppc_hpte_base_page_shift(v, r);
> + if (pshift <= 0)
> + goto out;
> lbuf += 2;
> nb += HPTE_SIZE;
>
> @@ -1869,14 +1874,18 @@ static ssize_t kvm_htab_write(struct file
> *file, const char __user *buf,
> goto out;
> }
> if (!mmu_ready && is_vrma_hpte(v)) {
> - unsigned long psize > hpte_base_page_size(v, r);
> - unsigned long senc > slb_pgsize_encoding(psize);
> - unsigned long lpcr;
> + unsigned long senc, lpcr;
>
> + senc = slb_pgsize_encoding(1ul <<
> pshift);
> kvm->arch.vrma_slb_v = senc |
> SLB_VSID_B_1T |
> (VRMA_VSID <<
> SLB_VSID_SHIFT_1T);
> - lpcr = senc << (LPCR_VRMASD_SH - 4);
> - kvmppc_update_lpcr(kvm, lpcr,
> LPCR_VRMASD);
> + if
> (!cpu_has_feature(CPU_FTR_ARCH_300)) {
> + lpcr = senc <<
> (LPCR_VRMASD_SH - 4);
> + kvmppc_update_lpcr(kvm,
> lpcr,
> + LPCR_VRMA
> SD);
> + } else {
> + kvmppc_setup_partition_table
> (kvm);
> + }
> mmu_ready = 1;
> }
> ++i;
> diff --git a/arch/powerpc/kvm/book3s_hv.c
> b/arch/powerpc/kvm/book3s_hv.c
> index 79ea3d9..2d46037 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI
> wakeup to a free host core");
>
> static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
> static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
> -static void kvmppc_setup_partition_table(struct kvm *kvm);
>
> static inline struct kvm_vcpu *next_runnable_thread(struct
> kvmppc_vcore *vc,
> int *ip)
> @@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct
> kvm_vcpu *vcpu)
> return;
> }
>
> -static void kvmppc_setup_partition_table(struct kvm *kvm)
> +void kvmppc_setup_partition_table(struct kvm *kvm)
> {
> unsigned long dw0, dw1;
>
WARNING: multiple messages have this Message-ID (diff)
From: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
To: Paul Mackerras <paulus@ozlabs.org>,
kvm-ppc@vger.kernel.org, kvm@vger.kernel.org
Subject: Re: [PATCH] KVM: PPC: Book3S HV: Fix migration and HPT resizing of HPT guests on radix hosts
Date: Wed, 22 Nov 2017 16:55:09 +1100 [thread overview]
Message-ID: <1511330109.5166.0.camel@gmail.com> (raw)
In-Reply-To: <20171122034152.GA30117@fergus.ozlabs.ibm.com>
On Wed, 2017-11-22 at 14:41 +1100, Paul Mackerras wrote:
> From a038eb6fa13cc49cc91e6e81409c230ef8086038 Mon Sep 17 00:00:00
> 2001
> From: Paul Mackerras <paulus@ozlabs.org>
> Date: Wed, 22 Nov 2017 14:38:53 +1100
> Subject:
>
> This fixes two errors that prevent a guest using the HPT MMU from
> successfully migrating to a POWER9 host in radix MMU mode, or
> resizing
> its HPT when running on a radix host.
>
> The first bug was that commit 8dc6cca556e4 ("KVM: PPC: Book3S HV:
> Don't rely on host's page size information", 2017-09-11) missed two
> uses of hpte_base_page_size(), one in the HPT rehashing code and
> one in kvm_htab_write() (which is used on the destination side in
> migrating a HPT guest). Instead we use
> kvmppc_hpte_base_page_shift().
> Having the shift count means that we can use left and right shifts
> instead of multiplication and division in a few places.
>
> Along the way, this adds a check in kvm_htab_write() to ensure that
> the
> page size encoding in the incoming HPTEs is recognized, and if not
> return an EINVAL error to userspace.
>
> The second bug was that kvm_htab_write was performing some but not
> all
> of the functions of kvmhv_setup_mmu(), resulting in the destination
> VM
> being left in radix mode as far as the hardware is concerned. The
> simplest fix for now is make kvm_htab_write() call
> kvmppc_setup_partition_table() like kvmppc_hv_setup_htab_rma() does.
> In future it would be better to refactor the code more extensively
> to remove the duplication.
>
> Fixes: 8dc6cca556e4 ("KVM: PPC: Book3S HV: Don't rely on host's page
> size information")
> Fixes: 7a84084c6054 ("KVM: PPC: Book3S HV: Set partition table rather
> than SDR1 on POWER9")
> Reported-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Tested-by: Suraj Jitindar Singh <sjitindarsingh@gmail.com>
> ---
> arch/powerpc/include/asm/kvm_ppc.h | 1 +
> arch/powerpc/kvm/book3s_64_mmu_hv.c | 37 +++++++++++++++++++++++--
> ------------
> arch/powerpc/kvm/book3s_hv.c | 3 +--
> 3 files changed, 25 insertions(+), 16 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> b/arch/powerpc/include/asm/kvm_ppc.h
> index 96753f3..941c2a3 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -180,6 +180,7 @@ extern void
> kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
> struct iommu_group *grp);
> extern int kvmppc_switch_mmu_to_hpt(struct kvm *kvm);
> extern int kvmppc_switch_mmu_to_radix(struct kvm *kvm);
> +extern void kvmppc_setup_partition_table(struct kvm *kvm);
>
> extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
> struct kvm_create_spapr_tce_64
> *args);
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> index 235319c..9660972 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
> @@ -1238,8 +1238,9 @@ static unsigned long
> resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
> unsigned long vpte, rpte, guest_rpte;
> int ret;
> struct revmap_entry *rev;
> - unsigned long apsize, psize, avpn, pteg, hash;
> + unsigned long apsize, avpn, pteg, hash;
> unsigned long new_idx, new_pteg, replace_vpte;
> + int pshift;
>
> hptep = (__be64 *)(old->virt + (idx << 4));
>
> @@ -1298,8 +1299,8 @@ static unsigned long
> resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
> goto out;
>
> rpte = be64_to_cpu(hptep[1]);
> - psize = hpte_base_page_size(vpte, rpte);
> - avpn = HPTE_V_AVPN_VAL(vpte) & ~((psize - 1) >> 23);
> + pshift = kvmppc_hpte_base_page_shift(vpte, rpte);
> + avpn = HPTE_V_AVPN_VAL(vpte) & ~(((1ul << pshift) - 1) >>
> 23);
> pteg = idx / HPTES_PER_GROUP;
> if (vpte & HPTE_V_SECONDARY)
> pteg = ~pteg;
> @@ -1311,20 +1312,20 @@ static unsigned long
> resize_hpt_rehash_hpte(struct kvm_resize_hpt *resize,
> offset = (avpn & 0x1f) << 23;
> vsid = avpn >> 5;
> /* We can find more bits from the pteg value */
> - if (psize < (1ULL << 23))
> - offset |= ((vsid ^ pteg) & old_hash_mask) *
> psize;
> + if (pshift < 23)
> + offset |= ((vsid ^ pteg) & old_hash_mask) <<
> pshift;
>
> - hash = vsid ^ (offset / psize);
> + hash = vsid ^ (offset >> pshift);
> } else {
> unsigned long offset, vsid;
>
> /* We only have 40 - 23 bits of seg_off in avpn */
> offset = (avpn & 0x1ffff) << 23;
> vsid = avpn >> 17;
> - if (psize < (1ULL << 23))
> - offset |= ((vsid ^ (vsid << 25) ^ pteg) &
> old_hash_mask) * psize;
> + if (pshift < 23)
> + offset |= ((vsid ^ (vsid << 25) ^ pteg) &
> old_hash_mask) << pshift;
>
> - hash = vsid ^ (vsid << 25) ^ (offset / psize);
> + hash = vsid ^ (vsid << 25) ^ (offset >> pshift);
> }
>
> new_pteg = hash & new_hash_mask;
> @@ -1801,6 +1802,7 @@ static ssize_t kvm_htab_write(struct file
> *file, const char __user *buf,
> ssize_t nb;
> long int err, ret;
> int mmu_ready;
> + int pshift;
>
> if (!access_ok(VERIFY_READ, buf, count))
> return -EFAULT;
> @@ -1855,6 +1857,9 @@ static ssize_t kvm_htab_write(struct file
> *file, const char __user *buf,
> err = -EINVAL;
> if (!(v & HPTE_V_VALID))
> goto out;
> + pshift = kvmppc_hpte_base_page_shift(v, r);
> + if (pshift <= 0)
> + goto out;
> lbuf += 2;
> nb += HPTE_SIZE;
>
> @@ -1869,14 +1874,18 @@ static ssize_t kvm_htab_write(struct file
> *file, const char __user *buf,
> goto out;
> }
> if (!mmu_ready && is_vrma_hpte(v)) {
> - unsigned long psize =
> hpte_base_page_size(v, r);
> - unsigned long senc =
> slb_pgsize_encoding(psize);
> - unsigned long lpcr;
> + unsigned long senc, lpcr;
>
> + senc = slb_pgsize_encoding(1ul <<
> pshift);
> kvm->arch.vrma_slb_v = senc |
> SLB_VSID_B_1T |
> (VRMA_VSID <<
> SLB_VSID_SHIFT_1T);
> - lpcr = senc << (LPCR_VRMASD_SH - 4);
> - kvmppc_update_lpcr(kvm, lpcr,
> LPCR_VRMASD);
> + if
> (!cpu_has_feature(CPU_FTR_ARCH_300)) {
> + lpcr = senc <<
> (LPCR_VRMASD_SH - 4);
> + kvmppc_update_lpcr(kvm,
> lpcr,
> + LPCR_VRMA
> SD);
> + } else {
> + kvmppc_setup_partition_table
> (kvm);
> + }
> mmu_ready = 1;
> }
> ++i;
> diff --git a/arch/powerpc/kvm/book3s_hv.c
> b/arch/powerpc/kvm/book3s_hv.c
> index 79ea3d9..2d46037 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -120,7 +120,6 @@ MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI
> wakeup to a free host core");
>
> static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
> static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
> -static void kvmppc_setup_partition_table(struct kvm *kvm);
>
> static inline struct kvm_vcpu *next_runnable_thread(struct
> kvmppc_vcore *vc,
> int *ip)
> @@ -3574,7 +3573,7 @@ static void kvmppc_mmu_destroy_hv(struct
> kvm_vcpu *vcpu)
> return;
> }
>
> -static void kvmppc_setup_partition_table(struct kvm *kvm)
> +void kvmppc_setup_partition_table(struct kvm *kvm)
> {
> unsigned long dw0, dw1;
>
next prev parent reply other threads:[~2017-11-22 5:55 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-22 3:41 [PATCH] KVM: PPC: Book3S HV: Fix migration and HPT resizing of HPT guests on radix hosts Paul Mackerras
2017-11-22 3:41 ` Paul Mackerras
2017-11-22 5:55 ` Suraj Jitindar Singh [this message]
2017-11-22 5:55 ` Suraj Jitindar Singh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1511330109.5166.0.camel@gmail.com \
--to=sjitindarsingh@gmail.com \
--cc=kvm-ppc@vger.kernel.org \
--cc=kvm@vger.kernel.org \
--cc=paulus@ozlabs.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.