From: Paul Mackerras <paulus@samba.org>
To: Alexander Graf <agraf@suse.de>
Cc: linuxppc-dev@ozlabs.org, kvm@vger.kernel.org, kvm-ppc@vger.kernel.org
Subject: [PATCH 07/13] KVM: PPC: Allow use of small pages to back Book3S HV
Date: Tue, 06 Dec 2011 06:09:09 +0000 [thread overview]
Message-ID: <20111206060908.GK12389@drongo> (raw)
In-Reply-To: <20111206060156.GD12389@drongo>
This relaxes the requirement that the guest memory be provided as
16MB huge pages, allowing it to be provided as normal memory, i.e.
in pages of PAGE_SIZE bytes (4k or 64k). To allow this, we index
the kvm->arch.slot_phys[] arrays with a small page index, even if
huge pages are being used, and use the low-order 5 bits of each
entry to store the order of the enclosing page with respect to
normal pages, i.e. log_2(enclosing_page_size / PAGE_SIZE).
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
arch/powerpc/include/asm/kvm_book3s_64.h | 8 ++
arch/powerpc/include/asm/kvm_host.h | 3 +-
arch/powerpc/include/asm/kvm_ppc.h | 2 +-
arch/powerpc/include/asm/reg.h | 1 +
arch/powerpc/kvm/book3s_64_mmu_hv.c | 122 ++++++++++++++++++++----------
arch/powerpc/kvm/book3s_hv.c | 57 ++++++++------
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 +-
7 files changed, 130 insertions(+), 69 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index ab6772e..d55e6b4 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -107,4 +107,12 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
return 0; /* error */
}
+static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
+ unsigned long pagesize)
+{
+ unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
+
+ return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
+}
+
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 2a52bdb..ba1da85 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -176,14 +176,13 @@ struct revmap_entry {
};
/* Low-order bits in kvm->arch.slot_phys[][] */
+#define KVMPPC_PAGE_ORDER_MASK 0x1f
#define KVMPPC_GOT_PAGE 0x80
struct kvm_arch {
#ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
struct revmap_entry *revmap;
- unsigned long ram_psize;
- unsigned long ram_porder;
unsigned int lpid;
unsigned int host_lpid;
unsigned long host_lpcr;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 111e1b4..a61b5b5 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -122,7 +122,7 @@ extern void kvmppc_free_hpt(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
- struct kvm_memory_slot *memslot);
+ struct kvm_memory_slot *memslot, unsigned long porder);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 559da19..4599d12 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -237,6 +237,7 @@
#define LPCR_ISL (1ul << (63-2))
#define LPCR_VC_SH (63-2)
#define LPCR_DPFD_SH (63-11)
+#define LPCR_VRMASD (0x1ful << (63-16))
#define LPCR_VRMA_L (1ul << (63-12))
#define LPCR_VRMA_LP0 (1ul << (63-15))
#define LPCR_VRMA_LP1 (1ul << (63-16))
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 87016cc..cc18f3d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -34,8 +34,6 @@
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
-/* Pages in the VRMA are 16MB pages */
-#define VRMA_PAGE_ORDER 24
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
@@ -95,17 +93,31 @@ void kvmppc_free_hpt(struct kvm *kvm)
free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
}
-void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
+/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
+}
+
+/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize = 0x10000) ? 0x1000 : 0;
+}
+
+void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
+ unsigned long porder)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long i;
unsigned long npages;
unsigned long hp_v, hp_r;
unsigned long addr, hash;
- unsigned long porder = kvm->arch.ram_porder;
+ unsigned long psize;
+ unsigned long hp0, hp1;
long ret;
- npages = kvm->arch.slot_npages[memslot->id];
+ psize = 1ul << porder;
+ npages = memslot->npages >> (porder - PAGE_SHIFT);
/* VRMA can't be > 1TB */
if (npages > 1ul << (40 - porder))
@@ -114,6 +126,11 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
if (npages > HPT_NPTEG)
npages = HPT_NPTEG;
+ hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
+ HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
+ hp1 = hpte1_pgsize_encoding(psize) |
+ HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
+
for (i = 0; i < npages; ++i) {
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
@@ -125,10 +142,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
* is available and use it.
*/
hash = (hash << 3) + 7;
- hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
- (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
- HPTE_V_LARGE | HPTE_V_VALID;
- hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
+ hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
+ hp_r = hp1 | addr;
ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
if (ret != H_SUCCESS) {
pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
@@ -176,22 +191,25 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
* one already in the kvm->arch.slot_phys[][] arrays.
*/
static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
- struct kvm_memory_slot *memslot)
+ struct kvm_memory_slot *memslot,
+ unsigned long psize)
{
unsigned long start;
- long np;
- struct page *page, *pages[1];
+ long np, err;
+ struct page *page, *hpage, *pages[1];
+ unsigned long s, pgsize;
unsigned long *physp;
- unsigned long pfn, i;
+ unsigned int got, pgorder;
+ unsigned long pfn, i, npages;
physp = kvm->arch.slot_phys[memslot->id];
if (!physp)
return -EINVAL;
- i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT);
- if (physp[i])
+ if (physp[gfn - memslot->base_gfn])
return 0;
page = NULL;
+ pgsize = psize;
start = gfn_to_hva_memslot(memslot, gfn);
/* Instantiate and get the page we want access to */
@@ -199,25 +217,46 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
if (np != 1)
return -EINVAL;
page = pages[0];
-
- /* Check it's a 16MB page */
- if (!PageHead(page) ||
- compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) {
- pr_err("page at %lx isn't 16MB (o=%d)\n",
- start, compound_order(page));
- put_page(page);
- return -EINVAL;
+ got = KVMPPC_GOT_PAGE;
+
+ /* See if this is a large page */
+ s = PAGE_SIZE;
+ if (PageHuge(page)) {
+ hpage = compound_head(page);
+ s <<= compound_order(hpage);
+ /* Get the whole large page if slot alignment is ok */
+ if (s > psize && slot_is_aligned(memslot, s) &&
+ !(memslot->userspace_addr & (s - 1))) {
+ start &= ~(s - 1);
+ pgsize = s;
+ page = hpage;
+ }
}
+ err = -EINVAL;
+ if (s < psize)
+ goto out;
pfn = page_to_pfn(page);
+ npages = pgsize >> PAGE_SHIFT;
+ pgorder = __ilog2(npages);
+ physp += (gfn - memslot->base_gfn) & ~(npages - 1);
spin_lock(&kvm->arch.slot_phys_lock);
- if (!physp[i])
- physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE;
- else
- put_page(page);
+ for (i = 0; i < npages; ++i) {
+ if (!physp[i]) {
+ physp[i] = ((pfn + i) << PAGE_SHIFT) + got + pgorder;
+ got = 0;
+ }
+ }
spin_unlock(&kvm->arch.slot_phys_lock);
+ err = 0;
- return 0;
+ out:
+ if (got) {
+ if (PageHuge(page))
+ page = compound_head(page);
+ put_page(page);
+ }
+ return err;
}
/*
@@ -242,7 +281,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
memslot = gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
return H_PARAMETER;
- if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
+ if (!slot_is_aligned(memslot, psize))
+ return H_PARAMETER;
+ if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
return H_PARAMETER;
preempt_disable();
@@ -269,8 +310,8 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
struct kvm_memory_slot *memslot;
unsigned long gfn = gpa >> PAGE_SHIFT;
struct page *page;
- unsigned long offset;
- unsigned long pfn, pa;
+ unsigned long psize, offset;
+ unsigned long pa;
unsigned long *physp;
memslot = gfn_to_memslot(kvm, gfn);
@@ -279,20 +320,23 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
physp = kvm->arch.slot_phys[memslot->id];
if (!physp)
return NULL;
- physp += (gfn - memslot->base_gfn) >>
- (kvm->arch.ram_porder - PAGE_SHIFT);
+ physp += gfn - memslot->base_gfn;
pa = *physp;
if (!pa) {
- if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
+ if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0)
return NULL;
pa = *physp;
}
- pfn = pa >> PAGE_SHIFT;
- page = pfn_to_page(pfn);
+ page = pfn_to_page(pa >> PAGE_SHIFT);
+ psize = PAGE_SIZE;
+ if (PageHuge(page)) {
+ page = compound_head(page);
+ psize <<= compound_order(page);
+ }
get_page(page);
- offset = gpa & (kvm->arch.ram_psize - 1);
+ offset = gpa & (psize - 1);
if (nb_ret)
- *nb_ret = kvm->arch.ram_psize - offset;
+ *nb_ret = psize - offset;
return page_address(page) + offset;
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 5d3590c..150f527 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -49,8 +49,6 @@
#include <linux/highmem.h>
#include <linux/hugetlb.h>
-#define LARGE_PAGE_ORDER 24 /* 16MB pages */
-
/* #define EXIT_DEBUG */
/* #define EXIT_DEBUG_SIMPLE */
/* #define EXIT_DEBUG_INT */
@@ -1105,24 +1103,26 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
return fd;
}
+static unsigned long slb_pgsize_encoding(unsigned long psize)
+{
+ unsigned long senc = 0;
+
+ if (psize > 0x1000) {
+ senc = SLB_VSID_L;
+ if (psize = 0x10000)
+ senc |= SLB_VSID_LP_01;
+ }
+ return senc;
+}
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
- unsigned long psize;
unsigned long npages;
unsigned long *phys;
- /* For now, only allow 16MB-aligned slots */
- psize = kvm->arch.ram_psize;
- if ((mem->memory_size & (psize - 1)) ||
- (mem->guest_phys_addr & (psize - 1))) {
- pr_err("bad memory_size=%llx @ %llx\n",
- mem->memory_size, mem->guest_phys_addr);
- return -EINVAL;
- }
-
/* Allocate a slot_phys array */
- npages = mem->memory_size >> kvm->arch.ram_porder;
+ npages = mem->memory_size >> PAGE_SHIFT;
phys = kvm->arch.slot_phys[mem->slot];
if (!phys) {
phys = vzalloc(npages * sizeof(unsigned long));
@@ -1150,6 +1150,8 @@ static void unpin_slot(struct kvm *kvm, int slot_id)
continue;
pfn = physp[j] >> PAGE_SHIFT;
page = pfn_to_page(pfn);
+ if (PageHuge(page))
+ page = compound_head(page);
SetPageDirty(page);
put_page(page);
}
@@ -1172,12 +1174,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
unsigned long hva;
struct kvm_memory_slot *memslot;
struct vm_area_struct *vma;
- unsigned long lpcr;
+ unsigned long lpcr, senc;
unsigned long psize, porder;
unsigned long rma_size;
unsigned long rmls;
unsigned long *physp;
- unsigned long i, npages, pa;
+ unsigned long i, npages;
mutex_lock(&kvm->lock);
if (kvm->arch.rma_setup_done)
@@ -1199,8 +1201,7 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
goto up_out;
psize = vma_kernel_pagesize(vma);
- if (psize != kvm->arch.ram_psize)
- goto up_out;
+ porder = __ilog2(psize);
/* Is this one of our preallocated RMAs? */
if (vma->vm_file && vma->vm_file->f_op = &kvm_rma_fops &&
@@ -1217,13 +1218,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
goto out;
}
+ /* We can handle 4k, 64k or 16M pages in the VRMA */
+ err = -EINVAL;
+ if (!(psize = 0x1000 || psize = 0x10000 ||
+ psize = 0x1000000))
+ goto out;
+
/* Update VRMASD field in the LPCR */
- lpcr = kvm->arch.lpcr & ~(0x1fUL << LPCR_VRMASD_SH);
- lpcr |= LPCR_VRMA_L;
+ senc = slb_pgsize_encoding(psize);
+ lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
+ lpcr |= senc << (LPCR_VRMASD_SH - 4);
kvm->arch.lpcr = lpcr;
/* Create HPTEs in the hash page table for the VRMA */
- kvmppc_map_vrma(vcpu, memslot);
+ kvmppc_map_vrma(vcpu, memslot, porder);
} else {
/* Set up to use an RMO region */
@@ -1262,13 +1270,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
/* Initialize phys addrs of pages in RMO */
- porder = kvm->arch.ram_porder;
- npages = rma_size >> porder;
- pa = ri->base_pfn << PAGE_SHIFT;
+ npages = ri->npages;
+ porder = __ilog2(npages);
physp = kvm->arch.slot_phys[memslot->id];
spin_lock(&kvm->arch.slot_phys_lock);
for (i = 0; i < npages; ++i)
- physp[i] = pa + (i << porder);
+ physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder;
spin_unlock(&kvm->arch.slot_phys_lock);
}
@@ -1297,8 +1304,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
- kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
- kvm->arch.ram_porder = LARGE_PAGE_ORDER;
kvm->arch.rma = NULL;
kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index d2eb8ac..c76305c 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -80,6 +80,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
memslot = builtin_gfn_to_memslot(kvm, gfn);
if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)))
return H_PARAMETER;
+
+ /* Check if the requested page fits entirely in the memslot. */
+ if (!slot_is_aligned(memslot, psize))
+ return H_PARAMETER;
slot_fn = gfn - memslot->base_gfn;
physp = kvm->arch.slot_phys[memslot->id];
@@ -91,9 +95,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
pa = *physp;
if (!pa)
return H_TOO_HARD;
+ pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
pa &= PAGE_MASK;
- pte_size = kvm->arch.ram_psize;
if (pte_size < psize)
return H_PARAMETER;
if (pa && pte_size > psize)
--
1.7.5.4
WARNING: multiple messages have this Message-ID (diff)
From: Paul Mackerras <paulus@samba.org>
To: Alexander Graf <agraf@suse.de>
Cc: linuxppc-dev@ozlabs.org, kvm@vger.kernel.org, kvm-ppc@vger.kernel.org
Subject: [PATCH 07/13] KVM: PPC: Allow use of small pages to back Book3S HV guests
Date: Tue, 6 Dec 2011 17:09:09 +1100 [thread overview]
Message-ID: <20111206060908.GK12389@drongo> (raw)
In-Reply-To: <20111206060156.GD12389@drongo>
This relaxes the requirement that the guest memory be provided as
16MB huge pages, allowing it to be provided as normal memory, i.e.
in pages of PAGE_SIZE bytes (4k or 64k). To allow this, we index
the kvm->arch.slot_phys[] arrays with a small page index, even if
huge pages are being used, and use the low-order 5 bits of each
entry to store the order of the enclosing page with respect to
normal pages, i.e. log_2(enclosing_page_size / PAGE_SIZE).
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
arch/powerpc/include/asm/kvm_book3s_64.h | 8 ++
arch/powerpc/include/asm/kvm_host.h | 3 +-
arch/powerpc/include/asm/kvm_ppc.h | 2 +-
arch/powerpc/include/asm/reg.h | 1 +
arch/powerpc/kvm/book3s_64_mmu_hv.c | 122 ++++++++++++++++++++----------
arch/powerpc/kvm/book3s_hv.c | 57 ++++++++------
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 +-
7 files changed, 130 insertions(+), 69 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index ab6772e..d55e6b4 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -107,4 +107,12 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
return 0; /* error */
}
+static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
+ unsigned long pagesize)
+{
+ unsigned long mask = (pagesize >> PAGE_SHIFT) - 1;
+
+ return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
+}
+
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 2a52bdb..ba1da85 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -176,14 +176,13 @@ struct revmap_entry {
};
/* Low-order bits in kvm->arch.slot_phys[][] */
+#define KVMPPC_PAGE_ORDER_MASK 0x1f
#define KVMPPC_GOT_PAGE 0x80
struct kvm_arch {
#ifdef CONFIG_KVM_BOOK3S_64_HV
unsigned long hpt_virt;
struct revmap_entry *revmap;
- unsigned long ram_psize;
- unsigned long ram_porder;
unsigned int lpid;
unsigned int host_lpid;
unsigned long host_lpcr;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 111e1b4..a61b5b5 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -122,7 +122,7 @@ extern void kvmppc_free_hpt(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
- struct kvm_memory_slot *memslot);
+ struct kvm_memory_slot *memslot, unsigned long porder);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce *args);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 559da19..4599d12 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -237,6 +237,7 @@
#define LPCR_ISL (1ul << (63-2))
#define LPCR_VC_SH (63-2)
#define LPCR_DPFD_SH (63-11)
+#define LPCR_VRMASD (0x1ful << (63-16))
#define LPCR_VRMA_L (1ul << (63-12))
#define LPCR_VRMA_LP0 (1ul << (63-15))
#define LPCR_VRMA_LP1 (1ul << (63-16))
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 87016cc..cc18f3d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -34,8 +34,6 @@
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
-/* Pages in the VRMA are 16MB pages */
-#define VRMA_PAGE_ORDER 24
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
@@ -95,17 +93,31 @@ void kvmppc_free_hpt(struct kvm *kvm)
free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
}
-void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
+/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize > 0x1000) ? HPTE_V_LARGE : 0;
+}
+
+/* Bits in second HPTE dword for pagesize 4k, 64k or 16M */
+static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize)
+{
+ return (pgsize == 0x10000) ? 0x1000 : 0;
+}
+
+void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
+ unsigned long porder)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long i;
unsigned long npages;
unsigned long hp_v, hp_r;
unsigned long addr, hash;
- unsigned long porder = kvm->arch.ram_porder;
+ unsigned long psize;
+ unsigned long hp0, hp1;
long ret;
- npages = kvm->arch.slot_npages[memslot->id];
+ psize = 1ul << porder;
+ npages = memslot->npages >> (porder - PAGE_SHIFT);
/* VRMA can't be > 1TB */
if (npages > 1ul << (40 - porder))
@@ -114,6 +126,11 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
if (npages > HPT_NPTEG)
npages = HPT_NPTEG;
+ hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
+ HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
+ hp1 = hpte1_pgsize_encoding(psize) |
+ HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
+
for (i = 0; i < npages; ++i) {
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
@@ -125,10 +142,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot)
* is available and use it.
*/
hash = (hash << 3) + 7;
- hp_v = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
- (i << (VRMA_PAGE_ORDER - 16)) | HPTE_V_BOLTED |
- HPTE_V_LARGE | HPTE_V_VALID;
- hp_r = addr | HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX;
+ hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
+ hp_r = hp1 | addr;
ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
if (ret != H_SUCCESS) {
pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
@@ -176,22 +191,25 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
* one already in the kvm->arch.slot_phys[][] arrays.
*/
static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
- struct kvm_memory_slot *memslot)
+ struct kvm_memory_slot *memslot,
+ unsigned long psize)
{
unsigned long start;
- long np;
- struct page *page, *pages[1];
+ long np, err;
+ struct page *page, *hpage, *pages[1];
+ unsigned long s, pgsize;
unsigned long *physp;
- unsigned long pfn, i;
+ unsigned int got, pgorder;
+ unsigned long pfn, i, npages;
physp = kvm->arch.slot_phys[memslot->id];
if (!physp)
return -EINVAL;
- i = (gfn - memslot->base_gfn) >> (kvm->arch.ram_porder - PAGE_SHIFT);
- if (physp[i])
+ if (physp[gfn - memslot->base_gfn])
return 0;
page = NULL;
+ pgsize = psize;
start = gfn_to_hva_memslot(memslot, gfn);
/* Instantiate and get the page we want access to */
@@ -199,25 +217,46 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
if (np != 1)
return -EINVAL;
page = pages[0];
-
- /* Check it's a 16MB page */
- if (!PageHead(page) ||
- compound_order(page) != (kvm->arch.ram_porder - PAGE_SHIFT)) {
- pr_err("page at %lx isn't 16MB (o=%d)\n",
- start, compound_order(page));
- put_page(page);
- return -EINVAL;
+ got = KVMPPC_GOT_PAGE;
+
+ /* See if this is a large page */
+ s = PAGE_SIZE;
+ if (PageHuge(page)) {
+ hpage = compound_head(page);
+ s <<= compound_order(hpage);
+ /* Get the whole large page if slot alignment is ok */
+ if (s > psize && slot_is_aligned(memslot, s) &&
+ !(memslot->userspace_addr & (s - 1))) {
+ start &= ~(s - 1);
+ pgsize = s;
+ page = hpage;
+ }
}
+ err = -EINVAL;
+ if (s < psize)
+ goto out;
pfn = page_to_pfn(page);
+ npages = pgsize >> PAGE_SHIFT;
+ pgorder = __ilog2(npages);
+ physp += (gfn - memslot->base_gfn) & ~(npages - 1);
spin_lock(&kvm->arch.slot_phys_lock);
- if (!physp[i])
- physp[i] = (pfn << PAGE_SHIFT) | KVMPPC_GOT_PAGE;
- else
- put_page(page);
+ for (i = 0; i < npages; ++i) {
+ if (!physp[i]) {
+ physp[i] = ((pfn + i) << PAGE_SHIFT) + got + pgorder;
+ got = 0;
+ }
+ }
spin_unlock(&kvm->arch.slot_phys_lock);
+ err = 0;
- return 0;
+ out:
+ if (got) {
+ if (PageHuge(page))
+ page = compound_head(page);
+ put_page(page);
+ }
+ return err;
}
/*
@@ -242,7 +281,9 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
memslot = gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
return H_PARAMETER;
- if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
+ if (!slot_is_aligned(memslot, psize))
+ return H_PARAMETER;
+ if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
return H_PARAMETER;
preempt_disable();
@@ -269,8 +310,8 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
struct kvm_memory_slot *memslot;
unsigned long gfn = gpa >> PAGE_SHIFT;
struct page *page;
- unsigned long offset;
- unsigned long pfn, pa;
+ unsigned long psize, offset;
+ unsigned long pa;
unsigned long *physp;
memslot = gfn_to_memslot(kvm, gfn);
@@ -279,20 +320,23 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
physp = kvm->arch.slot_phys[memslot->id];
if (!physp)
return NULL;
- physp += (gfn - memslot->base_gfn) >>
- (kvm->arch.ram_porder - PAGE_SHIFT);
+ physp += gfn - memslot->base_gfn;
pa = *physp;
if (!pa) {
- if (kvmppc_get_guest_page(kvm, gfn, memslot) < 0)
+ if (kvmppc_get_guest_page(kvm, gfn, memslot, PAGE_SIZE) < 0)
return NULL;
pa = *physp;
}
- pfn = pa >> PAGE_SHIFT;
- page = pfn_to_page(pfn);
+ page = pfn_to_page(pa >> PAGE_SHIFT);
+ psize = PAGE_SIZE;
+ if (PageHuge(page)) {
+ page = compound_head(page);
+ psize <<= compound_order(page);
+ }
get_page(page);
- offset = gpa & (kvm->arch.ram_psize - 1);
+ offset = gpa & (psize - 1);
if (nb_ret)
- *nb_ret = kvm->arch.ram_psize - offset;
+ *nb_ret = psize - offset;
return page_address(page) + offset;
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 5d3590c..150f527 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -49,8 +49,6 @@
#include <linux/highmem.h>
#include <linux/hugetlb.h>
-#define LARGE_PAGE_ORDER 24 /* 16MB pages */
-
/* #define EXIT_DEBUG */
/* #define EXIT_DEBUG_SIMPLE */
/* #define EXIT_DEBUG_INT */
@@ -1105,24 +1103,26 @@ long kvm_vm_ioctl_allocate_rma(struct kvm *kvm, struct kvm_allocate_rma *ret)
return fd;
}
+static unsigned long slb_pgsize_encoding(unsigned long psize)
+{
+ unsigned long senc = 0;
+
+ if (psize > 0x1000) {
+ senc = SLB_VSID_L;
+ if (psize == 0x10000)
+ senc |= SLB_VSID_LP_01;
+ }
+ return senc;
+}
+
int kvmppc_core_prepare_memory_region(struct kvm *kvm,
struct kvm_userspace_memory_region *mem)
{
- unsigned long psize;
unsigned long npages;
unsigned long *phys;
- /* For now, only allow 16MB-aligned slots */
- psize = kvm->arch.ram_psize;
- if ((mem->memory_size & (psize - 1)) ||
- (mem->guest_phys_addr & (psize - 1))) {
- pr_err("bad memory_size=%llx @ %llx\n",
- mem->memory_size, mem->guest_phys_addr);
- return -EINVAL;
- }
-
/* Allocate a slot_phys array */
- npages = mem->memory_size >> kvm->arch.ram_porder;
+ npages = mem->memory_size >> PAGE_SHIFT;
phys = kvm->arch.slot_phys[mem->slot];
if (!phys) {
phys = vzalloc(npages * sizeof(unsigned long));
@@ -1150,6 +1150,8 @@ static void unpin_slot(struct kvm *kvm, int slot_id)
continue;
pfn = physp[j] >> PAGE_SHIFT;
page = pfn_to_page(pfn);
+ if (PageHuge(page))
+ page = compound_head(page);
SetPageDirty(page);
put_page(page);
}
@@ -1172,12 +1174,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
unsigned long hva;
struct kvm_memory_slot *memslot;
struct vm_area_struct *vma;
- unsigned long lpcr;
+ unsigned long lpcr, senc;
unsigned long psize, porder;
unsigned long rma_size;
unsigned long rmls;
unsigned long *physp;
- unsigned long i, npages, pa;
+ unsigned long i, npages;
mutex_lock(&kvm->lock);
if (kvm->arch.rma_setup_done)
@@ -1199,8 +1201,7 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
goto up_out;
psize = vma_kernel_pagesize(vma);
- if (psize != kvm->arch.ram_psize)
- goto up_out;
+ porder = __ilog2(psize);
/* Is this one of our preallocated RMAs? */
if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
@@ -1217,13 +1218,20 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
goto out;
}
+ /* We can handle 4k, 64k or 16M pages in the VRMA */
+ err = -EINVAL;
+ if (!(psize == 0x1000 || psize == 0x10000 ||
+ psize == 0x1000000))
+ goto out;
+
/* Update VRMASD field in the LPCR */
- lpcr = kvm->arch.lpcr & ~(0x1fUL << LPCR_VRMASD_SH);
- lpcr |= LPCR_VRMA_L;
+ senc = slb_pgsize_encoding(psize);
+ lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
+ lpcr |= senc << (LPCR_VRMASD_SH - 4);
kvm->arch.lpcr = lpcr;
/* Create HPTEs in the hash page table for the VRMA */
- kvmppc_map_vrma(vcpu, memslot);
+ kvmppc_map_vrma(vcpu, memslot, porder);
} else {
/* Set up to use an RMO region */
@@ -1262,13 +1270,12 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
/* Initialize phys addrs of pages in RMO */
- porder = kvm->arch.ram_porder;
- npages = rma_size >> porder;
- pa = ri->base_pfn << PAGE_SHIFT;
+ npages = ri->npages;
+ porder = __ilog2(npages);
physp = kvm->arch.slot_phys[memslot->id];
spin_lock(&kvm->arch.slot_phys_lock);
for (i = 0; i < npages; ++i)
- physp[i] = pa + (i << porder);
+ physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) + porder;
spin_unlock(&kvm->arch.slot_phys_lock);
}
@@ -1297,8 +1304,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
- kvm->arch.ram_psize = 1ul << LARGE_PAGE_ORDER;
- kvm->arch.ram_porder = LARGE_PAGE_ORDER;
kvm->arch.rma = NULL;
kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index d2eb8ac..c76305c 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -80,6 +80,10 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
memslot = builtin_gfn_to_memslot(kvm, gfn);
if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)))
return H_PARAMETER;
+
+ /* Check if the requested page fits entirely in the memslot. */
+ if (!slot_is_aligned(memslot, psize))
+ return H_PARAMETER;
slot_fn = gfn - memslot->base_gfn;
physp = kvm->arch.slot_phys[memslot->id];
@@ -91,9 +95,9 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
pa = *physp;
if (!pa)
return H_TOO_HARD;
+ pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
pa &= PAGE_MASK;
- pte_size = kvm->arch.ram_psize;
if (pte_size < psize)
return H_PARAMETER;
if (pa && pte_size > psize)
--
1.7.5.4
next prev parent reply other threads:[~2011-12-06 6:09 UTC|newest]
Thread overview: 40+ messages / expand[flat|nested] mbox.gz Atom feed top
2011-12-06 6:01 [PATCH 0/13] KVM: PPC: Update Book3S HV memory handling Paul Mackerras
2011-12-06 6:01 ` Paul Mackerras
2011-12-06 6:01 ` Paul Mackerras
2011-12-06 6:02 ` [PATCH 01/13] KVM: PPC: Move kvm_vcpu_ioctl_[gs]et_one_reg down to Paul Mackerras
2011-12-06 6:02 ` [PATCH 01/13] KVM: PPC: Move kvm_vcpu_ioctl_[gs]et_one_reg down to platform-specific code Paul Mackerras
2011-12-06 6:02 ` Paul Mackerras
2011-12-06 6:03 ` [PATCH 02/13] KVM: PPC: Keep a record of HV guest view of hashed Paul Mackerras
2011-12-06 6:03 ` [PATCH 02/13] KVM: PPC: Keep a record of HV guest view of hashed page table entries Paul Mackerras
2011-12-06 6:03 ` Paul Mackerras
2011-12-06 6:04 ` [PATCH 03/13] KVM: PPC: Keep page physical addresses in per-slot Paul Mackerras
2011-12-06 6:04 ` [PATCH 03/13] KVM: PPC: Keep page physical addresses in per-slot arrays Paul Mackerras
2011-12-06 6:04 ` Paul Mackerras
2011-12-06 6:07 ` [PATCH 04/13] KVM: PPC: Add an interface for pinning guest pages in Paul Mackerras
2011-12-06 6:07 ` [PATCH 04/13] KVM: PPC: Add an interface for pinning guest pages in Book3s HV guests Paul Mackerras
2011-12-06 6:07 ` Paul Mackerras
2011-12-06 6:08 ` [PATCH 05/13] KVM: PPC: Make the H_ENTER hcall more reliable Paul Mackerras
2011-12-06 6:08 ` Paul Mackerras
2011-12-06 6:08 ` Paul Mackerras
2011-12-06 6:08 ` [PATCH 06/13] KVM: PPC: Only get pages when actually needed, not in Paul Mackerras
2011-12-06 6:08 ` [PATCH 06/13] KVM: PPC: Only get pages when actually needed, not in prepare_memory_region() Paul Mackerras
2011-12-06 6:08 ` Paul Mackerras
2011-12-06 6:09 ` Paul Mackerras [this message]
2011-12-06 6:09 ` [PATCH 07/13] KVM: PPC: Allow use of small pages to back Book3S HV guests Paul Mackerras
2011-12-06 6:09 ` [PATCH 08/13] KVM: PPC: Allow I/O mappings in memory slots Paul Mackerras
2011-12-06 6:09 ` Paul Mackerras
2011-12-06 6:09 ` Paul Mackerras
2011-12-06 6:10 ` [PATCH 09/13] KVM: PPC: Maintain a doubly-linked list of guest HPTEs Paul Mackerras
2011-12-06 6:10 ` [PATCH 09/13] KVM: PPC: Maintain a doubly-linked list of guest HPTEs for each gfn Paul Mackerras
2011-12-06 6:10 ` Paul Mackerras
2011-12-06 6:10 ` [PATCH 10/13] KVM: PPC: Implement MMIO emulation support for Book3S Paul Mackerras
2011-12-06 6:10 ` [PATCH 10/13] KVM: PPC: Implement MMIO emulation support for Book3S HV guests Paul Mackerras
2011-12-06 6:10 ` Paul Mackerras
2011-12-06 6:13 ` [PATCH 11/13] KVM: Add barriers to allow mmu_notifier_retry to be Paul Mackerras
2011-12-06 6:13 ` [PATCH 11/13] KVM: Add barriers to allow mmu_notifier_retry to be used locklessly Paul Mackerras
2011-12-06 6:13 ` Paul Mackerras
2011-12-06 6:14 ` [PATCH 12/13] KVM: PPC: Implement MMU notifiers for Book3S HV guests Paul Mackerras
2011-12-06 6:14 ` Paul Mackerras
2011-12-06 6:14 ` [PATCH 13/13] KVM: PPC: Allow for read-only pages backing a Book3S Paul Mackerras
2011-12-06 6:14 ` [PATCH 13/13] KVM: PPC: Allow for read-only pages backing a Book3S HV guest Paul Mackerras
2011-12-06 6:14 ` Paul Mackerras
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20111206060908.GK12389@drongo \
--to=paulus@samba.org \
--cc=agraf@suse.de \
--cc=kvm-ppc@vger.kernel.org \
--cc=kvm@vger.kernel.org \
--cc=linuxppc-dev@ozlabs.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.