public inbox for kvm@vger.kernel.org
 help / color / mirror / Atom feed
From: Alexander Graf <agraf@suse.de>
To: kvm-ppc@vger.kernel.org
Cc: kvm list <kvm@vger.kernel.org>, Avi Kivity <avi@redhat.com>,
	Marcelo Tosatti <mtosatti@redhat.com>,
	Paul Mackerras <paulus@samba.org>
Subject: [PATCH 35/52] KVM: PPC: Maintain a doubly-linked list of guest HPTEs for each gfn
Date: Fri, 13 Jan 2012 15:31:38 +0100	[thread overview]
Message-ID: <1326465115-5976-36-git-send-email-agraf@suse.de> (raw)
In-Reply-To: <1326465115-5976-1-git-send-email-agraf@suse.de>

From: Paul Mackerras <paulus@samba.org>

This expands the reverse mapping array to contain two links for each
HPTE which are used to link together HPTEs that correspond to the
same guest logical page.  Each circular list of HPTEs is pointed to
by the rmap array entry for the guest logical page, pointed to by
the relevant memslot.  Links are 32-bit HPT entry indexes rather than
full 64-bit pointers, to save space.  We use 3 of the remaining 32
bits in the rmap array entries as a lock bit, a referenced bit and
a present bit (the present bit is needed since HPTE index 0 is valid).
The bit lock for the rmap chain nests inside the HPTE lock bit.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 arch/powerpc/include/asm/kvm_book3s_64.h |   18 ++++++
 arch/powerpc/include/asm/kvm_host.h      |   17 ++++++-
 arch/powerpc/kvm/book3s_hv_rm_mmu.c      |   84 +++++++++++++++++++++++++++++-
 3 files changed, 117 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 18b590d..9508c03 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -113,6 +113,11 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l)
 	return 0;				/* error */
 }
 
+static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize)
+{
+	return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
+}
+
 static inline int hpte_cache_flags_ok(unsigned long ptel, unsigned long io_type)
 {
 	unsigned int wimg = ptel & HPTE_R_WIMG;
@@ -139,6 +144,19 @@ static inline unsigned long hpte_cache_bits(unsigned long pte_val)
 #endif
 }
 
+static inline void lock_rmap(unsigned long *rmap)
+{
+	do {
+		while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap))
+			cpu_relax();
+	} while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap));
+}
+
+static inline void unlock_rmap(unsigned long *rmap)
+{
+	__clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap);
+}
+
 static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
 				   unsigned long pagesize)
 {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 243bc80..97cb2d7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -170,12 +170,27 @@ struct kvmppc_rma_info {
 /*
  * The reverse mapping array has one entry for each HPTE,
  * which stores the guest's view of the second word of the HPTE
- * (including the guest physical address of the mapping).
+ * (including the guest physical address of the mapping),
+ * plus forward and backward pointers in a doubly-linked ring
+ * of HPTEs that map the same host page.  The pointers in this
+ * ring are 32-bit HPTE indexes, to save space.
  */
 struct revmap_entry {
 	unsigned long guest_rpte;
+	unsigned int forw, back;
 };
 
+/*
+ * We use the top bit of each memslot->rmap entry as a lock bit,
+ * and bit 32 as a present flag.  The bottom 32 bits are the
+ * index in the guest HPT of a HPTE that points to the page.
+ */
+#define KVMPPC_RMAP_LOCK_BIT	63
+#define KVMPPC_RMAP_REF_BIT	33
+#define KVMPPC_RMAP_REFERENCED	(1ul << KVMPPC_RMAP_REF_BIT)
+#define KVMPPC_RMAP_PRESENT	0x100000000ul
+#define KVMPPC_RMAP_INDEX	0xfffffffful
+
 /* Low-order bits in kvm->arch.slot_phys[][] */
 #define KVMPPC_PAGE_ORDER_MASK	0x1f
 #define KVMPPC_PAGE_NO_CACHE	HPTE_R_I	/* 0x20 */
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 3f5b016..5b31caa 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -54,6 +54,70 @@ static void *real_vmalloc_addr(void *x)
 	return __va(addr);
 }
 
+/*
+ * Add this HPTE into the chain for the real page.
+ * Must be called with the chain locked; it unlocks the chain.
+ */
+static void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
+			     unsigned long *rmap, long pte_index, int realmode)
+{
+	struct revmap_entry *head, *tail;
+	unsigned long i;
+
+	if (*rmap & KVMPPC_RMAP_PRESENT) {
+		i = *rmap & KVMPPC_RMAP_INDEX;
+		head = &kvm->arch.revmap[i];
+		if (realmode)
+			head = real_vmalloc_addr(head);
+		tail = &kvm->arch.revmap[head->back];
+		if (realmode)
+			tail = real_vmalloc_addr(tail);
+		rev->forw = i;
+		rev->back = head->back;
+		tail->forw = pte_index;
+		head->back = pte_index;
+	} else {
+		rev->forw = rev->back = pte_index;
+		i = pte_index;
+	}
+	smp_wmb();
+	*rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
+}
+
+/* Remove this HPTE from the chain for a real page */
+static void remove_revmap_chain(struct kvm *kvm, long pte_index,
+				unsigned long hpte_v)
+{
+	struct revmap_entry *rev, *next, *prev;
+	unsigned long gfn, ptel, head;
+	struct kvm_memory_slot *memslot;
+	unsigned long *rmap;
+
+	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+	ptel = rev->guest_rpte;
+	gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel));
+	memslot = builtin_gfn_to_memslot(kvm, gfn);
+	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
+		return;
+
+	rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
+	lock_rmap(rmap);
+
+	head = *rmap & KVMPPC_RMAP_INDEX;
+	next = real_vmalloc_addr(&kvm->arch.revmap[rev->forw]);
+	prev = real_vmalloc_addr(&kvm->arch.revmap[rev->back]);
+	next->back = rev->back;
+	prev->forw = rev->forw;
+	if (head == pte_index) {
+		head = rev->forw;
+		if (head == pte_index)
+			*rmap &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX);
+		else
+			*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) | head;
+	}
+	unlock_rmap(rmap);
+}
+
 long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 		    long pte_index, unsigned long pteh, unsigned long ptel)
 {
@@ -66,6 +130,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	struct kvm_memory_slot *memslot;
 	unsigned long *physp, pte_size;
 	unsigned long is_io;
+	unsigned long *rmap;
 	bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
 
 	psize = hpte_page_size(pteh, ptel);
@@ -83,6 +148,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	if (!slot_is_aligned(memslot, psize))
 		return H_PARAMETER;
 	slot_fn = gfn - memslot->base_gfn;
+	rmap = &memslot->rmap[slot_fn];
 
 	physp = kvm->arch.slot_phys[memslot->id];
 	if (!physp)
@@ -164,13 +230,25 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	}
 
 	/* Save away the guest's idea of the second HPTE dword */
-	rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+	rev = &kvm->arch.revmap[pte_index];
+	if (realmode)
+		rev = real_vmalloc_addr(rev);
 	if (rev)
 		rev->guest_rpte = g_ptel;
+
+	/* Link HPTE into reverse-map chain */
+	if (realmode)
+		rmap = real_vmalloc_addr(rmap);
+	lock_rmap(rmap);
+	kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index, realmode);
+
 	hpte[1] = ptel;
+
+	/* Write the first HPTE dword, unlocking the HPTE and making it valid */
 	eieio();
 	hpte[0] = pteh;
 	asm volatile("ptesync" : : : "memory");
+
 	vcpu->arch.gpr[4] = pte_index;
 	return H_SUCCESS;
 }
@@ -220,6 +298,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
 	vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK;
 	vcpu->arch.gpr[5] = r = hpte[1];
 	rb = compute_tlbie_rb(v, r, pte_index);
+	remove_revmap_chain(kvm, pte_index, v);
+	smp_wmb();
 	hpte[0] = 0;
 	if (!(flags & H_LOCAL)) {
 		while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
@@ -293,6 +373,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
 		flags |= (hp[1] >> 5) & 0x0c;
 		args[i * 2] = ((0x80 | flags) << 56) + pte_index;
 		tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
+		remove_revmap_chain(kvm, pte_index, hp[0]);
+		smp_wmb();
 		hp[0] = 0;
 	}
 	if (n_inval == 0)
-- 
1.6.0.2

  parent reply	other threads:[~2012-01-13 14:31 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-13 14:31 [PULL 00/52] ppc patch queue 2012-01-13 Alexander Graf
2012-01-13 14:31 ` [PATCH 01/52] KVM: PPC: e500: don't translate gfn to pfn with preemption disabled Alexander Graf
2012-01-13 14:31 ` [PATCH 02/52] KVM: PPC: e500: Eliminate preempt_disable in local_sid_destroy_all Alexander Graf
2012-01-13 14:31 ` [PATCH 03/52] KVM: PPC: e500: clear up confusion between host and guest entries Alexander Graf
2012-01-13 14:31 ` [PATCH 04/52] KVM: PPC: e500: MMU API Alexander Graf
2012-01-13 14:31 ` [PATCH 05/52] KVM: PPC: e500: tlbsx: fix tlb0 esel Alexander Graf
2012-01-13 14:31 ` [PATCH 06/52] KVM: PPC: e500: Don't hardcode PIR=0 Alexander Graf
2012-01-13 14:31 ` [PATCH 07/52] KVM: PPC: E500: Support hugetlbfs Alexander Graf
2012-01-13 14:31 ` [PATCH 08/52] PPC: Fix race in mtmsr paravirt implementation Alexander Graf
2012-01-13 14:31 ` [PATCH 09/52] Fix DEC truncation for greater than 0xffff_ffff/1000 Alexander Graf
2012-01-13 14:31 ` [PATCH 10/52] KVM: booke: Do Not start decrementer when SPRN_DEC set 0 Alexander Graf
2012-01-13 14:31 ` [PATCH 11/52] KVM: PPC: booke: check for signals in kvmppc_vcpu_run Alexander Graf
2012-01-13 14:31 ` [PATCH 12/52] KVM: PPC: Rename deliver_interrupts to prepare_to_enter Alexander Graf
2012-01-13 14:31 ` [PATCH 13/52] KVM: PPC: Move prepare_to_enter call site into subarch code Alexander Graf
2012-01-13 14:31 ` [PATCH 14/52] KVM: PPC: booke: Check for MSR[WE] in prepare_to_enter Alexander Graf
2012-01-13 14:31 ` [PATCH 15/52] KVM: PPC: booke: Fix int_pending calculation for MSR[EE] paravirt Alexander Graf
2012-01-13 14:31 ` [PATCH 16/52] KVM: PPC: booke: Paravirtualize wrtee Alexander Graf
2012-01-13 14:31 ` [PATCH 17/52] KVM: PPC: Paravirtualize SPRG4-7, ESR, PIR, MASn Alexander Graf
2012-01-13 14:31 ` [PATCH 18/52] KVM: PPC: booke: Improve timer register emulation Alexander Graf
2012-01-13 14:31 ` [PATCH 19/52] KVM: PPC: Book3s: PR: Disable preemption in vcpu_run Alexander Graf
2012-01-13 14:31 ` [PATCH 20/52] KVM: PPC: Book3s: PR: No irq_disable " Alexander Graf
2012-01-13 14:31 ` [PATCH 21/52] KVM: PPC: Use get/set for to_svcpu to help preemption Alexander Graf
2012-01-13 14:31 ` [PATCH 22/52] KVM: PPC: align vcpu_kick with x86 Alexander Graf
2012-01-13 14:31 ` [PATCH 23/52] KVM: PPC: Book3S: PR: Fix signal check race Alexander Graf
2012-01-13 14:31 ` [PATCH 24/52] KVM: PPC: e500: Fix TLBnCFG in KVM_CONFIG_TLB Alexander Graf
2012-01-13 14:31 ` [PATCH 25/52] KVM: PPC: e500: use hardware hint when loading TLB0 entries Alexander Graf
2012-01-13 14:31 ` [PATCH 26/52] KVM: PPC: Avoid patching paravirt template code Alexander Graf
2012-01-13 14:31 ` [PATCH 27/52] KVM: PPC: Make wakeups work again for Book3S HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 28/52] KVM: PPC: Keep a record of HV guest view of hashed page table entries Alexander Graf
2012-01-13 14:31 ` [PATCH 29/52] KVM: PPC: Keep page physical addresses in per-slot arrays Alexander Graf
2012-01-13 14:31 ` [PATCH 30/52] KVM: PPC: Add an interface for pinning guest pages in Book3s HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 31/52] KVM: PPC: Make the H_ENTER hcall more reliable Alexander Graf
2012-01-13 14:31 ` [PATCH 32/52] KVM: PPC: Only get pages when actually needed, not in prepare_memory_region() Alexander Graf
2012-01-13 14:31 ` [PATCH 33/52] KVM: PPC: Allow use of small pages to back Book3S HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 34/52] KVM: PPC: Allow I/O mappings in memory slots Alexander Graf
2012-01-13 14:31 ` Alexander Graf [this message]
2012-01-13 14:31 ` [PATCH 36/52] KVM: PPC: Implement MMIO emulation support for Book3S HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 37/52] KVM: PPC: Implement MMU notifiers " Alexander Graf
2012-01-13 14:31 ` [PATCH 38/52] KVM: Add barriers to allow mmu_notifier_retry to be used locklessly Alexander Graf
2012-01-13 14:31 ` [PATCH 39/52] KVM: PPC: Allow for read-only pages backing a Book3S HV guest Alexander Graf
2012-01-13 14:31 ` [PATCH 40/52] KVM: PPC: Fix vcpu_create dereference before validity check Alexander Graf
2012-01-13 14:31 ` [PATCH 41/52] KVM: PPC: Add KVM_CAP_NR_VCPUS and KVM_CAP_MAX_VCPUS Alexander Graf
2012-01-13 14:31 ` [PATCH 42/52] KVM: PPC: Book3S HV: Keep HPTE locked when invalidating Alexander Graf
2012-01-13 14:31 ` [PATCH 43/52] KVM: PPC: Book3s HV: Maintain separate guest and host views of R and C bits Alexander Graf
2012-01-13 14:31 ` [PATCH 44/52] KVM: PPC: Book3S HV: Use the hardware referenced bit for kvm_age_hva Alexander Graf
2012-01-13 14:31 ` [PATCH 45/52] KVM: PPC: Book3s HV: Implement get_dirty_log using hardware changed bit Alexander Graf
2012-01-13 14:31 ` [PATCH 46/52] KVM: PPC: booke: Add booke206 TLB trace Alexander Graf
2012-01-13 14:31 ` [PATCH 47/52] KVM: PPC: Use the vcpu kmem_cache when allocating new VCPUs Alexander Graf
2012-01-13 14:31 ` [PATCH 48/52] KVM: PPC: Add generic single register ioctls Alexander Graf
2012-01-13 14:31 ` [PATCH 49/52] KVM: PPC: Add support for explicit HIOR setting Alexander Graf
2012-01-13 14:31 ` [PATCH 50/52] KVM: PPC: Move kvm_vcpu_ioctl_[gs]et_one_reg down to platform-specific code Alexander Graf
2012-01-13 14:31 ` [PATCH 51/52] KVM: PPC: Rename MMIO register identifiers Alexander Graf
2012-01-13 14:31 ` [PATCH 52/52] KVM: PPC: refer to paravirt docs in header file Alexander Graf
2012-01-16 10:55 ` [PULL 00/52] ppc patch queue 2012-01-13 Marcelo Tosatti

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1326465115-5976-36-git-send-email-agraf@suse.de \
    --to=agraf@suse.de \
    --cc=avi@redhat.com \
    --cc=kvm-ppc@vger.kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=mtosatti@redhat.com \
    --cc=paulus@samba.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox