From: Alexander Graf <agraf@suse.de>
To: kvm-ppc@vger.kernel.org
Cc: kvm list <kvm@vger.kernel.org>, Avi Kivity <avi@redhat.com>,
Marcelo Tosatti <mtosatti@redhat.com>,
Paul Mackerras <paulus@samba.org>
Subject: [PATCH 39/52] KVM: PPC: Allow for read-only pages backing a Book3S HV guest
Date: Fri, 13 Jan 2012 15:31:42 +0100 [thread overview]
Message-ID: <1326465115-5976-40-git-send-email-agraf@suse.de> (raw)
In-Reply-To: <1326465115-5976-1-git-send-email-agraf@suse.de>
From: Paul Mackerras <paulus@samba.org>
With this, if a guest does an H_ENTER with a read/write HPTE on a page
which is currently read-only, we make the actual HPTE inserted be a
read-only version of the HPTE. We now intercept protection faults as
well as HPTE not found faults, and for a protection fault we work out
whether it should be reflected to the guest (e.g. because the guest HPTE
didn't allow write access to usermode) or handled by switching to
kernel context and calling kvmppc_book3s_hv_page_fault, which will then
request write access to the page and update the actual HPTE.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/kvm_book3s_64.h | 20 +++++++++++++-
arch/powerpc/kvm/book3s_64_mmu_hv.c | 39 +++++++++++++++++++++++++++--
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 32 +++++++++++++++++-------
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 +-
4 files changed, 78 insertions(+), 17 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index c21e46d..b0c08b1 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -121,6 +121,22 @@ static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
}
+static inline int hpte_is_writable(unsigned long ptel)
+{
+ unsigned long pp = ptel & (HPTE_R_PP0 | HPTE_R_PP);
+
+ return pp != PP_RXRX && pp != PP_RXXX;
+}
+
+static inline unsigned long hpte_make_readonly(unsigned long ptel)
+{
+ if ((ptel & HPTE_R_PP0) || (ptel & HPTE_R_PP) == PP_RWXX)
+ ptel = (ptel & ~HPTE_R_PP) | PP_RXXX;
+ else
+ ptel |= PP_RXRX;
+ return ptel;
+}
+
static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
{
unsigned int wimg = ptel & HPTE_R_WIMG;
@@ -140,7 +156,7 @@ static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
* Lock and read a linux PTE. If it's present and writable, atomically
* set dirty and referenced bits and return the PTE, otherwise return 0.
*/
-static inline pte_t kvmppc_read_update_linux_pte(pte_t *p)
+static inline pte_t kvmppc_read_update_linux_pte(pte_t *p, int writing)
{
pte_t pte, tmp;
@@ -158,7 +174,7 @@ static inline pte_t kvmppc_read_update_linux_pte(pte_t *p)
if (pte_present(pte)) {
pte = pte_mkyoung(pte);
- if (pte_write(pte))
+ if (writing && pte_write(pte))
pte = pte_mkdirty(pte);
}
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 83761dd..66d6452 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -503,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
struct page *page, *pages[1];
long index, ret, npages;
unsigned long is_io;
+ unsigned int writing, write_ok;
struct vm_area_struct *vma;
/*
@@ -553,8 +554,11 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
pfn = 0;
page = NULL;
pte_size = PAGE_SIZE;
+ writing = (dsisr & DSISR_ISSTORE) != 0;
+ /* If writing != 0, then the HPTE must allow writing, if we get here */
+ write_ok = writing;
hva = gfn_to_hva_memslot(memslot, gfn);
- npages = get_user_pages_fast(hva, 1, 1, pages);
+ npages = get_user_pages_fast(hva, 1, writing, pages);
if (npages < 1) {
/* Check if it's an I/O mapping */
down_read(¤t->mm->mmap_sem);
@@ -565,6 +569,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
((hva - vma->vm_start) >> PAGE_SHIFT);
pte_size = psize;
is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
+ write_ok = vma->vm_flags & VM_WRITE;
}
up_read(¤t->mm->mmap_sem);
if (!pfn)
@@ -575,6 +580,24 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
page = compound_head(page);
pte_size <<= compound_order(page);
}
+ /* if the guest wants write access, see if that is OK */
+ if (!writing && hpte_is_writable(r)) {
+ pte_t *ptep, pte;
+
+ /*
+ * We need to protect against page table destruction
+ * while looking up and updating the pte.
+ */
+ rcu_read_lock_sched();
+ ptep = find_linux_pte_or_hugepte(current->mm->pgd,
+ hva, NULL);
+ if (ptep && pte_present(*ptep)) {
+ pte = kvmppc_read_update_linux_pte(ptep, 1);
+ if (pte_write(pte))
+ write_ok = 1;
+ }
+ rcu_read_unlock_sched();
+ }
pfn = page_to_pfn(page);
}
@@ -595,6 +618,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Set the HPTE to point to pfn */
r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT);
+ if (hpte_is_writable(r) && !write_ok)
+ r = hpte_make_readonly(r);
ret = RESUME_GUEST;
preempt_disable();
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
@@ -614,14 +639,22 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unlock_rmap(rmap);
goto out_unlock;
}
- kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
+
+ if (hptep[0] & HPTE_V_VALID) {
+ /* HPTE was previously valid, so we need to invalidate it */
+ unlock_rmap(rmap);
+ hptep[0] |= HPTE_V_ABSENT;
+ kvmppc_invalidate_hpte(kvm, hptep, index);
+ } else {
+ kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
+ }
hptep[1] = r;
eieio();
hptep[0] = hpte[0];
asm volatile("ptesync" : : : "memory");
preempt_enable();
- if (page)
+ if (page && hpte_is_writable(r))
SetPageDirty(page);
out_put:
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 81d16ed..d3e36fc 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -120,7 +120,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
}
static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
- unsigned long *pte_sizep)
+ int writing, unsigned long *pte_sizep)
{
pte_t *ptep;
unsigned long ps = *pte_sizep;
@@ -137,7 +137,7 @@ static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
return __pte(0);
if (!pte_present(*ptep))
return __pte(0);
- return kvmppc_read_update_linux_pte(ptep);
+ return kvmppc_read_update_linux_pte(ptep, writing);
}
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
@@ -154,12 +154,14 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long is_io;
unsigned long *rmap;
pte_t pte;
+ unsigned int writing;
unsigned long mmu_seq;
bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
psize = hpte_page_size(pteh, ptel);
if (!psize)
return H_PARAMETER;
+ writing = hpte_is_writable(ptel);
pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
/* used later to detect if we might have been invalidated */
@@ -208,8 +210,11 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
/* Look up the Linux PTE for the backing page */
pte_size = psize;
- pte = lookup_linux_pte(vcpu, hva, &pte_size);
+ pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
if (pte_present(pte)) {
+ if (writing && !pte_write(pte))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
is_io = hpte_cache_bits(pte_val(pte));
pa = pte_pfn(pte) << PAGE_SHIFT;
}
@@ -678,7 +683,9 @@ EXPORT_SYMBOL(kvmppc_hv_find_lock_hpte);
/*
* Called in real mode to check whether an HPTE not found fault
- * is due to accessing a paged-out page or an emulated MMIO page.
+ * is due to accessing a paged-out page or an emulated MMIO page,
+ * or if a protection fault is due to accessing a page that the
+ * guest wanted read/write access to but which we made read-only.
* Returns a possibly modified status (DSISR) value if not
* (i.e. pass the interrupt to the guest),
* -1 to pass the fault up to host kernel mode code, -2 to do that
@@ -696,12 +703,17 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
struct revmap_entry *rev;
unsigned long pp, key;
- valid = HPTE_V_VALID | HPTE_V_ABSENT;
+ /* For protection fault, expect to find a valid HPTE */
+ valid = HPTE_V_VALID;
+ if (status & DSISR_NOHPTE)
+ valid |= HPTE_V_ABSENT;
index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
- if (index < 0)
- return status; /* there really was no HPTE */
-
+ if (index < 0) {
+ if (status & DSISR_NOHPTE)
+ return status; /* there really was no HPTE */
+ return 0; /* for prot fault, HPTE disappeared */
+ }
hpte = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
v = hpte[0] & ~HPTE_V_HVLOCK;
r = hpte[1];
@@ -712,8 +724,8 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
asm volatile("lwsync" : : : "memory");
hpte[0] = v;
- /* If the HPTE is valid by now, retry the instruction */
- if (v & HPTE_V_VALID)
+ /* For not found, if the HPTE is valid by now, retry the instruction */
+ if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
return 0;
/* Check access permissions to the page */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 7d49906..b70bf22 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1114,8 +1114,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
kvmppc_hdsi:
mfspr r4, SPRN_HDAR
mfspr r6, SPRN_HDSISR
- /* HPTE not found fault? */
- andis. r0, r6, DSISR_NOHPTE@h
+ /* HPTE not found fault or protection fault? */
+ andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
beq 1f /* if not, send it to the guest */
andi. r0, r11, MSR_DR /* data relocation enabled? */
beq 3f
--
1.6.0.2
next prev parent reply other threads:[~2012-01-13 14:31 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-01-13 14:31 [PULL 00/52] ppc patch queue 2012-01-13 Alexander Graf
2012-01-13 14:31 ` [PATCH 01/52] KVM: PPC: e500: don't translate gfn to pfn with preemption disabled Alexander Graf
2012-01-13 14:31 ` [PATCH 02/52] KVM: PPC: e500: Eliminate preempt_disable in local_sid_destroy_all Alexander Graf
2012-01-13 14:31 ` [PATCH 03/52] KVM: PPC: e500: clear up confusion between host and guest entries Alexander Graf
2012-01-13 14:31 ` [PATCH 04/52] KVM: PPC: e500: MMU API Alexander Graf
2012-01-13 14:31 ` [PATCH 05/52] KVM: PPC: e500: tlbsx: fix tlb0 esel Alexander Graf
2012-01-13 14:31 ` [PATCH 06/52] KVM: PPC: e500: Don't hardcode PIR=0 Alexander Graf
2012-01-13 14:31 ` [PATCH 07/52] KVM: PPC: E500: Support hugetlbfs Alexander Graf
2012-01-13 14:31 ` [PATCH 08/52] PPC: Fix race in mtmsr paravirt implementation Alexander Graf
2012-01-13 14:31 ` [PATCH 09/52] Fix DEC truncation for greater than 0xffff_ffff/1000 Alexander Graf
2012-01-13 14:31 ` [PATCH 10/52] KVM: booke: Do Not start decrementer when SPRN_DEC set 0 Alexander Graf
2012-01-13 14:31 ` [PATCH 11/52] KVM: PPC: booke: check for signals in kvmppc_vcpu_run Alexander Graf
2012-01-13 14:31 ` [PATCH 12/52] KVM: PPC: Rename deliver_interrupts to prepare_to_enter Alexander Graf
2012-01-13 14:31 ` [PATCH 13/52] KVM: PPC: Move prepare_to_enter call site into subarch code Alexander Graf
2012-01-13 14:31 ` [PATCH 14/52] KVM: PPC: booke: Check for MSR[WE] in prepare_to_enter Alexander Graf
2012-01-13 14:31 ` [PATCH 15/52] KVM: PPC: booke: Fix int_pending calculation for MSR[EE] paravirt Alexander Graf
2012-01-13 14:31 ` [PATCH 16/52] KVM: PPC: booke: Paravirtualize wrtee Alexander Graf
2012-01-13 14:31 ` [PATCH 17/52] KVM: PPC: Paravirtualize SPRG4-7, ESR, PIR, MASn Alexander Graf
2012-01-13 14:31 ` [PATCH 18/52] KVM: PPC: booke: Improve timer register emulation Alexander Graf
2012-01-13 14:31 ` [PATCH 19/52] KVM: PPC: Book3s: PR: Disable preemption in vcpu_run Alexander Graf
2012-01-13 14:31 ` [PATCH 20/52] KVM: PPC: Book3s: PR: No irq_disable " Alexander Graf
2012-01-13 14:31 ` [PATCH 21/52] KVM: PPC: Use get/set for to_svcpu to help preemption Alexander Graf
2012-01-13 14:31 ` [PATCH 22/52] KVM: PPC: align vcpu_kick with x86 Alexander Graf
2012-01-13 14:31 ` [PATCH 23/52] KVM: PPC: Book3S: PR: Fix signal check race Alexander Graf
2012-01-13 14:31 ` [PATCH 24/52] KVM: PPC: e500: Fix TLBnCFG in KVM_CONFIG_TLB Alexander Graf
2012-01-13 14:31 ` [PATCH 25/52] KVM: PPC: e500: use hardware hint when loading TLB0 entries Alexander Graf
2012-01-13 14:31 ` [PATCH 26/52] KVM: PPC: Avoid patching paravirt template code Alexander Graf
2012-01-13 14:31 ` [PATCH 27/52] KVM: PPC: Make wakeups work again for Book3S HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 28/52] KVM: PPC: Keep a record of HV guest view of hashed page table entries Alexander Graf
2012-01-13 14:31 ` [PATCH 29/52] KVM: PPC: Keep page physical addresses in per-slot arrays Alexander Graf
2012-01-13 14:31 ` [PATCH 30/52] KVM: PPC: Add an interface for pinning guest pages in Book3s HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 31/52] KVM: PPC: Make the H_ENTER hcall more reliable Alexander Graf
2012-01-13 14:31 ` [PATCH 32/52] KVM: PPC: Only get pages when actually needed, not in prepare_memory_region() Alexander Graf
2012-01-13 14:31 ` [PATCH 33/52] KVM: PPC: Allow use of small pages to back Book3S HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 34/52] KVM: PPC: Allow I/O mappings in memory slots Alexander Graf
2012-01-13 14:31 ` [PATCH 35/52] KVM: PPC: Maintain a doubly-linked list of guest HPTEs for each gfn Alexander Graf
2012-01-13 14:31 ` [PATCH 36/52] KVM: PPC: Implement MMIO emulation support for Book3S HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 37/52] KVM: PPC: Implement MMU notifiers " Alexander Graf
2012-01-13 14:31 ` [PATCH 38/52] KVM: Add barriers to allow mmu_notifier_retry to be used locklessly Alexander Graf
2012-01-13 14:31 ` Alexander Graf [this message]
2012-01-13 14:31 ` [PATCH 40/52] KVM: PPC: Fix vcpu_create dereference before validity check Alexander Graf
2012-01-13 14:31 ` [PATCH 41/52] KVM: PPC: Add KVM_CAP_NR_VCPUS and KVM_CAP_MAX_VCPUS Alexander Graf
2012-01-13 14:31 ` [PATCH 42/52] KVM: PPC: Book3S HV: Keep HPTE locked when invalidating Alexander Graf
2012-01-13 14:31 ` [PATCH 43/52] KVM: PPC: Book3s HV: Maintain separate guest and host views of R and C bits Alexander Graf
2012-01-13 14:31 ` [PATCH 44/52] KVM: PPC: Book3S HV: Use the hardware referenced bit for kvm_age_hva Alexander Graf
2012-01-13 14:31 ` [PATCH 45/52] KVM: PPC: Book3s HV: Implement get_dirty_log using hardware changed bit Alexander Graf
2012-01-13 14:31 ` [PATCH 46/52] KVM: PPC: booke: Add booke206 TLB trace Alexander Graf
2012-01-13 14:31 ` [PATCH 47/52] KVM: PPC: Use the vcpu kmem_cache when allocating new VCPUs Alexander Graf
2012-01-13 14:31 ` [PATCH 48/52] KVM: PPC: Add generic single register ioctls Alexander Graf
2012-01-13 14:31 ` [PATCH 49/52] KVM: PPC: Add support for explicit HIOR setting Alexander Graf
2012-01-13 14:31 ` [PATCH 50/52] KVM: PPC: Move kvm_vcpu_ioctl_[gs]et_one_reg down to platform-specific code Alexander Graf
2012-01-13 14:31 ` [PATCH 51/52] KVM: PPC: Rename MMIO register identifiers Alexander Graf
2012-01-13 14:31 ` [PATCH 52/52] KVM: PPC: refer to paravirt docs in header file Alexander Graf
2012-01-16 10:55 ` [PULL 00/52] ppc patch queue 2012-01-13 Marcelo Tosatti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1326465115-5976-40-git-send-email-agraf@suse.de \
--to=agraf@suse.de \
--cc=avi@redhat.com \
--cc=kvm-ppc@vger.kernel.org \
--cc=kvm@vger.kernel.org \
--cc=mtosatti@redhat.com \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox