public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Alexander Graf <agraf@suse.de>
To: kvm-ppc@vger.kernel.org
Cc: kvm list <kvm@vger.kernel.org>, Avi Kivity <avi@redhat.com>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	Paul Mackerras <paulus@samba.org>
Subject: [PATCH 34/52] KVM: PPC: Allow I/O mappings in memory slots
Date: Fri, 13 Jan 2012 15:31:37 +0100	[thread overview]
Message-ID: <1326465115-5976-35-git-send-email-agraf@suse.de> (raw)
In-Reply-To: <1326465115-5976-1-git-send-email-agraf@suse.de>

From: Paul Mackerras <paulus@samba.org>

This provides for the case where userspace maps an I/O device into the
address range of a memory slot using a VM_PFNMAP mapping.  In that
case, we work out the pfn from vma->vm_pgoff, and record the cache
enable bits from vma->vm_page_prot in two low-order bits in the
slot_phys array entries.  Then, in kvmppc_h_enter() we check that the
cache bits in the HPTE that the guest wants to insert match the cache
bits in the slot_phys array entry.  However, we do allow the guest to
create what it thinks is a non-cacheable or write-through mapping to
memory that is actually cacheable, so that we can use normal system
memory as part of an emulated device later on.  In that case the actual
HPTE we insert is a cacheable HPTE.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   26 +++++++++++
 arch/powerpc/include/asm/kvm_host.h      |    2 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c      |   67 ++++++++++++++++++++----------
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      |   15 +++++-
 4 files changed, 85 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 10920f7..18b590d 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -113,6 +113,32 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
 	return 0;				/* error */
 }
 
+static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
+{
+	unsigned int wimg = ptel & HPTE_R_WIMG;
+
+	/* Handle SAO */
+	if (wimg == (HPTE_R_W | HPTE_R_I | HPTE_R_M) &&
+	    cpu_has_feature(CPU_FTR_ARCH_206))
+		wimg = HPTE_R_M;
+
+	if (!io_type)
+		return wimg == HPTE_R_M;
+
+	return (wimg & (HPTE_R_W | HPTE_R_I)) == io_type;
+}
+
+/* Return HPTE cache control bits corresponding to Linux pte bits */
+static inline unsigned long hpte_cache_bits(unsigned long pte_val)
+{
+#if _PAGE_NO_CACHE == HPTE_R_I && _PAGE_WRITETHRU == HPTE_R_W
+	return pte_val & (HPTE_R_W | HPTE_R_I);
+#else
+	return ((pte_val & _PAGE_NO_CACHE) ? HPTE_R_I : 0) +
+		((pte_val & _PAGE_WRITETHRU) ? HPTE_R_W : 0);
+#endif
+}
+
 static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
 				   unsigned long pagesize)
 {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 9252d5e..243bc80 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -178,6 +178,8 @@ struct revmap_entry {
 
 /* Low-order bits in kvm->arch.slot_phys[][] */
 #define KVMPPC_PAGE_ORDER_MASK	0x1f
+#define KVMPPC_PAGE_NO_CACHE	HPTE_R_I	/* 0x20 */
+#define KVMPPC_PAGE_WRITETHRU	HPTE_R_W	/* 0x40 */
 #define KVMPPC_GOT_PAGE		0x80
 
 struct kvm_arch {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index cc18f3d..b904c40 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -199,7 +199,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
 	struct page *page, *hpage, *pages[1];
 	unsigned long s, pgsize;
 	unsigned long *physp;
-	unsigned int got, pgorder;
+	unsigned int is_io, got, pgorder;
+	struct vm_area_struct *vma;
 	unsigned long pfn, i, npages;
 
 	physp = kvm->arch.slot_phys[memslot->id];
@@ -208,34 +209,51 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
 	if (physp[gfn - memslot->base_gfn])
 		return 0;
 
+	is_io = 0;
+	got = 0;
 	page = NULL;
 	pgsize = psize;
+	err = -EINVAL;
 	start = gfn_to_hva_memslot(memslot, gfn);
 
 	/* Instantiate and get the page we want access to */
 	np = get_user_pages_fast(start, 1, 1, pages);
-	if (np != 1)
-		return -EINVAL;
-	page = pages[0];
-	got = KVMPPC_GOT_PAGE;
+	if (np != 1) {
+		/* Look up the vma for the page */
+		down_read(&current->mm->mmap_sem);
+		vma = find_vma(current->mm, start);
+		if (!vma || vma->vm_start > start ||
+		    start + psize > vma->vm_end ||
+		    !(vma->vm_flags & VM_PFNMAP))
+			goto up_err;
+		is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+		pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
+		/* check alignment of pfn vs. requested page size */
+		if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
+			goto up_err;
+		up_read(&current->mm->mmap_sem);
 
-	/* See if this is a large page */
-	s = PAGE_SIZE;
-	if (PageHuge(page)) {
-		hpage = compound_head(page);
-		s <<= compound_order(hpage);
-		/* Get the whole large page if slot alignment is ok */
-		if (s > psize && slot_is_aligned(memslot, s) &&
-		    !(memslot->userspace_addr & (s - 1))) {
-			start &= ~(s - 1);
-			pgsize = s;
-			page = hpage;
+	} else {
+		page = pages[0];
+		got = KVMPPC_GOT_PAGE;
+
+		/* See if this is a large page */
+		s = PAGE_SIZE;
+		if (PageHuge(page)) {
+			hpage = compound_head(page);
+			s <<= compound_order(hpage);
+			/* Get the whole large page if slot alignment is ok */
+			if (s > psize && slot_is_aligned(memslot, s) &&
+			    !(memslot->userspace_addr & (s - 1))) {
+				start &= ~(s - 1);
+				pgsize = s;
+				page = hpage;
+			}
 		}
+		if (s < psize)
+			goto out;
+		pfn = page_to_pfn(page);
 	}
-	err = -EINVAL;
-	if (s < psize)
-		goto out;
-	pfn = page_to_pfn(page);
 
 	npages = pgsize >> PAGE_SHIFT;
 	pgorder = __ilog2(npages);
@@ -243,7 +261,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
 	spin_lock(&kvm->arch.slot_phys_lock);
 	for (i = 0; i < npages; ++i) {
 		if (!physp[i]) {
-			physp[i] = ((pfn + i) << PAGE_SHIFT) + got + pgorder;
+			physp[i] = ((pfn + i) << PAGE_SHIFT) +
+				got + is_io + pgorder;
 			got = 0;
 		}
 	}
@@ -257,6 +276,10 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
 		put_page(page);
 	}
 	return err;
+
+ up_err:
+	up_read(&current->mm->mmap_sem);
+	return err;
 }
 
 /*
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index c086eb0..3f5b016 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -65,6 +65,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	unsigned long g_ptel = ptel;
 	struct kvm_memory_slot *memslot;
 	unsigned long *physp, pte_size;
+	unsigned long is_io;
 	bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
 
 	psize = hpte_page_size(pteh, ptel);
@@ -92,6 +93,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	pa = *physp;
 	if (!pa)
 		return H_TOO_HARD;
+	is_io = pa & (HPTE_R_I | HPTE_R_W);
 	pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
 	pa &= PAGE_MASK;
 
@@ -104,9 +106,16 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	ptel |= pa;
 
 	/* Check WIMG */
-	if ((ptel & HPTE_R_WIMG) != HPTE_R_M &&
-	    (ptel & HPTE_R_WIMG) != (HPTE_R_W | HPTE_R_I | HPTE_R_M))
-		return H_PARAMETER;
+	if (!hpte_cache_flags_ok(ptel, is_io)) {
+		if (is_io)
+			return H_PARAMETER;
+		/*
+		 * Allow guest to map emulated device memory as
+		 * uncacheable, but actually make it cacheable.
+		 */
+		ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
+		ptel |= HPTE_R_M;
+	}
 	pteh &= ~0x60UL;
 	pteh |= HPTE_V_VALID;
 
-- 
1.6.0.2


  parent reply	other threads:[~2012-01-13 14:58 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-13 14:31 [PULL 00/52] ppc patch queue 2012-01-13 Alexander Graf
2012-01-13 14:31 ` [PATCH 01/52] KVM: PPC: e500: don't translate gfn to pfn with preemption disabled Alexander Graf
2012-01-13 14:31 ` [PATCH 02/52] KVM: PPC: e500: Eliminate preempt_disable in local_sid_destroy_all Alexander Graf
2012-01-13 14:31 ` [PATCH 03/52] KVM: PPC: e500: clear up confusion between host and guest entries Alexander Graf
2012-01-13 14:31 ` [PATCH 04/52] KVM: PPC: e500: MMU API Alexander Graf
2012-01-13 14:31 ` [PATCH 05/52] KVM: PPC: e500: tlbsx: fix tlb0 esel Alexander Graf
2012-01-13 14:31 ` [PATCH 06/52] KVM: PPC: e500: Don't hardcode PIR=0 Alexander Graf
2012-01-13 14:31 ` [PATCH 07/52] KVM: PPC: E500: Support hugetlbfs Alexander Graf
2012-01-13 14:31 ` [PATCH 08/52] PPC: Fix race in mtmsr paravirt implementation Alexander Graf
2012-01-13 14:31 ` [PATCH 09/52] Fix DEC truncation for greater than 0xffff_ffff/1000 Alexander Graf
2012-01-13 14:31 ` [PATCH 10/52] KVM: booke: Do Not start decrementer when SPRN_DEC set 0 Alexander Graf
2012-01-13 14:31 ` [PATCH 11/52] KVM: PPC: booke: check for signals in kvmppc_vcpu_run Alexander Graf
2012-01-13 14:31 ` [PATCH 12/52] KVM: PPC: Rename deliver_interrupts to prepare_to_enter Alexander Graf
2012-01-13 14:31 ` [PATCH 13/52] KVM: PPC: Move prepare_to_enter call site into subarch code Alexander Graf
2012-01-13 14:31 ` [PATCH 14/52] KVM: PPC: booke: Check for MSR[WE] in prepare_to_enter Alexander Graf
2012-01-13 14:31 ` [PATCH 15/52] KVM: PPC: booke: Fix int_pending calculation for MSR[EE] paravirt Alexander Graf
2012-01-13 14:31 ` [PATCH 16/52] KVM: PPC: booke: Paravirtualize wrtee Alexander Graf
2012-01-13 14:31 ` [PATCH 17/52] KVM: PPC: Paravirtualize SPRG4-7, ESR, PIR, MASn Alexander Graf
2012-01-13 14:31 ` [PATCH 18/52] KVM: PPC: booke: Improve timer register emulation Alexander Graf
2012-01-13 14:31 ` [PATCH 19/52] KVM: PPC: Book3s: PR: Disable preemption in vcpu_run Alexander Graf
2012-01-13 14:31 ` [PATCH 20/52] KVM: PPC: Book3s: PR: No irq_disable " Alexander Graf
2012-01-13 14:31 ` [PATCH 21/52] KVM: PPC: Use get/set for to_svcpu to help preemption Alexander Graf
2012-01-13 14:31 ` [PATCH 22/52] KVM: PPC: align vcpu_kick with x86 Alexander Graf
2012-01-13 14:31 ` [PATCH 23/52] KVM: PPC: Book3S: PR: Fix signal check race Alexander Graf
2012-01-13 14:31 ` [PATCH 24/52] KVM: PPC: e500: Fix TLBnCFG in KVM_CONFIG_TLB Alexander Graf
2012-01-13 14:31 ` [PATCH 25/52] KVM: PPC: e500: use hardware hint when loading TLB0 entries Alexander Graf
2012-01-13 14:31 ` [PATCH 26/52] KVM: PPC: Avoid patching paravirt template code Alexander Graf
2012-01-13 14:31 ` [PATCH 27/52] KVM: PPC: Make wakeups work again for Book3S HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 28/52] KVM: PPC: Keep a record of HV guest view of hashed page table entries Alexander Graf
2012-01-13 14:31 ` [PATCH 29/52] KVM: PPC: Keep page physical addresses in per-slot arrays Alexander Graf
2012-01-13 14:31 ` [PATCH 30/52] KVM: PPC: Add an interface for pinning guest pages in Book3s HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 31/52] KVM: PPC: Make the H_ENTER hcall more reliable Alexander Graf
2012-01-13 14:31 ` [PATCH 32/52] KVM: PPC: Only get pages when actually needed, not in prepare_memory_region() Alexander Graf
2012-01-13 14:31 ` [PATCH 33/52] KVM: PPC: Allow use of small pages to back Book3S HV guests Alexander Graf
2012-01-13 14:31 ` Alexander Graf [this message]
2012-01-13 14:31 ` [PATCH 35/52] KVM: PPC: Maintain a doubly-linked list of guest HPTEs for each gfn Alexander Graf
2012-01-13 14:31 ` [PATCH 36/52] KVM: PPC: Implement MMIO emulation support for Book3S HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 37/52] KVM: PPC: Implement MMU notifiers " Alexander Graf
2012-01-13 14:31 ` [PATCH 38/52] KVM: Add barriers to allow mmu_notifier_retry to be used locklessly Alexander Graf
2012-01-13 14:31 ` [PATCH 39/52] KVM: PPC: Allow for read-only pages backing a Book3S HV guest Alexander Graf
2012-01-13 14:31 ` [PATCH 40/52] KVM: PPC: Fix vcpu_create dereference before validity check Alexander Graf
2012-01-13 14:31 ` [PATCH 41/52] KVM: PPC: Add KVM_CAP_NR_VCPUS and KVM_CAP_MAX_VCPUS Alexander Graf
2012-01-13 14:31 ` [PATCH 42/52] KVM: PPC: Book3S HV: Keep HPTE locked when invalidating Alexander Graf
2012-01-13 14:31 ` [PATCH 43/52] KVM: PPC: Book3s HV: Maintain separate guest and host views of R and C bits Alexander Graf
2012-01-13 14:31 ` [PATCH 44/52] KVM: PPC: Book3S HV: Use the hardware referenced bit for kvm_age_hva Alexander Graf
2012-01-13 14:31 ` [PATCH 45/52] KVM: PPC: Book3s HV: Implement get_dirty_log using hardware changed bit Alexander Graf
2012-01-13 14:31 ` [PATCH 46/52] KVM: PPC: booke: Add booke206 TLB trace Alexander Graf
2012-01-13 14:31 ` [PATCH 47/52] KVM: PPC: Use the vcpu kmem_cache when allocating new VCPUs Alexander Graf
2012-01-13 14:31 ` [PATCH 48/52] KVM: PPC: Add generic single register ioctls Alexander Graf
2012-01-13 14:31 ` [PATCH 49/52] KVM: PPC: Add support for explicit HIOR setting Alexander Graf
2012-01-13 14:31 ` [PATCH 50/52] KVM: PPC: Move kvm_vcpu_ioctl_[gs]et_one_reg down to platform-specific code Alexander Graf
2012-01-13 14:31 ` [PATCH 51/52] KVM: PPC: Rename MMIO register identifiers Alexander Graf
2012-01-13 14:31 ` [PATCH 52/52] KVM: PPC: refer to paravirt docs in header file Alexander Graf
2012-01-16 10:55 ` [PULL 00/52] ppc patch queue 2012-01-13 Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1326465115-5976-35-git-send-email-agraf@suse.de \
    --to=agraf@suse.de \
    --cc=avi@redhat.com \
    --cc=kvm-ppc@vger.kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox