From: Alexander Graf <agraf@suse.de>
To: kvm-ppc@vger.kernel.org
Cc: kvm list <kvm@vger.kernel.org>, Avi Kivity <avi@redhat.com>,
Marcelo Tosatti <mtosatti@redhat.com>,
Paul Mackerras <paulus@samba.org>
Subject: [PATCH 42/52] KVM: PPC: Book3S HV: Keep HPTE locked when invalidating
Date: Fri, 13 Jan 2012 15:31:45 +0100 [thread overview]
Message-ID: <1326465115-5976-43-git-send-email-agraf@suse.de> (raw)
In-Reply-To: <1326465115-5976-1-git-send-email-agraf@suse.de>
From: Paul Mackerras <paulus@samba.org>
This reworks the implementations of the H_REMOVE and H_BULK_REMOVE
hcalls to make sure that we keep the HPTE locked and in the reverse-
mapping chain until we have finished invalidating it. Previously
we would remove it from the chain and unlock it before invalidating
it, leaving a tiny window when the guest could access the page even
though we believe we have removed it from the guest (e.g.,
kvm_unmap_hva() has been called for the page and has found no HPTEs
in the chain). In addition, we'll need this for future patches where
we will need to read the R and C bits in the HPTE after invalidating
it.
Doing this required restructuring kvmppc_h_bulk_remove() substantially.
Since we want to batch up the tlbies, we now need to keep several
HPTEs locked simultaneously. In order to avoid possible deadlocks,
we don't spin on the HPTE bitlock for any except the first HPTE in
a batch. If we can't acquire the HPTE bitlock for the second or
subsequent HPTE, we terminate the batch at that point, do the tlbies
that we have accumulated so far, unlock those HPTEs, and then start
a new batch to do the remaining invalidations.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 212 ++++++++++++++++++++--------------
1 files changed, 125 insertions(+), 87 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index d3e36fc..ba4a137 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -140,6 +140,12 @@ static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
return kvmppc_read_update_linux_pte(ptep, writing);
}
+static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
+{
+ asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+ hpte[0] = hpte_v;
+}
+
long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel)
{
@@ -356,6 +362,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
struct kvm *kvm = vcpu->kvm;
unsigned long *hpte;
unsigned long v, r, rb;
+ struct revmap_entry *rev;
if (pte_index >= HPT_NPTE)
return H_PARAMETER;
@@ -368,30 +375,32 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
hpte[0] &= ~HPTE_V_HVLOCK;
return H_NOT_FOUND;
}
- if (atomic_read(&kvm->online_vcpus) == 1)
- flags |= H_LOCAL;
- vcpu->arch.gpr[4] = v = hpte[0] & ~HPTE_V_HVLOCK;
- vcpu->arch.gpr[5] = r = hpte[1];
- rb = compute_tlbie_rb(v, r, pte_index);
- if (v & HPTE_V_VALID)
+
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ v = hpte[0] & ~HPTE_V_HVLOCK;
+ if (v & HPTE_V_VALID) {
+ hpte[0] &= ~HPTE_V_VALID;
+ rb = compute_tlbie_rb(v, hpte[1], pte_index);
+ if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
+ while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
+ : : "r" (rb), "r" (kvm->arch.lpid));
+ asm volatile("ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ asm volatile("tlbiel %0" : : "r" (rb));
+ asm volatile("ptesync" : : : "memory");
+ }
remove_revmap_chain(kvm, pte_index, v);
- smp_wmb();
- hpte[0] = 0;
- if (!(v & HPTE_V_VALID))
- return H_SUCCESS;
- if (!(flags & H_LOCAL)) {
- while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- asm volatile("ptesync" : : : "memory");
- asm volatile(PPC_TLBIE(%1,%0)"; eieio; tlbsync"
- : : "r" (rb), "r" (kvm->arch.lpid));
- asm volatile("ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
- } else {
- asm volatile("ptesync" : : : "memory");
- asm volatile("tlbiel %0" : : "r" (rb));
- asm volatile("ptesync" : : : "memory");
}
+ r = rev->guest_rpte;
+ unlock_hpte(hpte, 0);
+
+ vcpu->arch.gpr[4] = v;
+ vcpu->arch.gpr[5] = r;
return H_SUCCESS;
}
@@ -399,82 +408,113 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
{
struct kvm *kvm = vcpu->kvm;
unsigned long *args = &vcpu->arch.gpr[4];
- unsigned long *hp, tlbrb[4];
- long int i, found;
- long int n_inval = 0;
- unsigned long flags, req, pte_index;
+ unsigned long *hp, *hptes[4], tlbrb[4];
+ long int i, j, k, n, found, indexes[4];
+ unsigned long flags, req, pte_index, rcbits;
long int local = 0;
long int ret = H_SUCCESS;
+ struct revmap_entry *rev, *revs[4];
if (atomic_read(&kvm->online_vcpus) == 1)
local = 1;
- for (i = 0; i < 4; ++i) {
- pte_index = args[i * 2];
- flags = pte_index >> 56;
- pte_index &= ((1ul << 56) - 1);
- req = flags >> 6;
- flags &= 3;
- if (req == 3)
- break;
- if (req != 1 || flags == 3 ||
- pte_index >= HPT_NPTE) {
- /* parameter error */
- args[i * 2] = ((0xa0 | flags) << 56) + pte_index;
- ret = H_PARAMETER;
- break;
- }
- hp = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
- while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
- cpu_relax();
- found = 0;
- if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
- switch (flags & 3) {
- case 0: /* absolute */
- found = 1;
+ for (i = 0; i < 4 && ret == H_SUCCESS; ) {
+ n = 0;
+ for (; i < 4; ++i) {
+ j = i * 2;
+ pte_index = args[j];
+ flags = pte_index >> 56;
+ pte_index &= ((1ul << 56) - 1);
+ req = flags >> 6;
+ flags &= 3;
+ if (req == 3) { /* no more requests */
+ i = 4;
break;
- case 1: /* andcond */
- if (!(hp[0] & args[i * 2 + 1]))
- found = 1;
+ }
+ if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
+ /* parameter error */
+ args[j] = ((0xa0 | flags) << 56) + pte_index;
+ ret = H_PARAMETER;
break;
- case 2: /* AVPN */
- if ((hp[0] & ~0x7fUL) == args[i * 2 + 1])
+ }
+ hp = (unsigned long *)
+ (kvm->arch.hpt_virt + (pte_index << 4));
+ /* to avoid deadlock, don't spin except for first */
+ if (!try_lock_hpte(hp, HPTE_V_HVLOCK)) {
+ if (n)
+ break;
+ while (!try_lock_hpte(hp, HPTE_V_HVLOCK))
+ cpu_relax();
+ }
+ found = 0;
+ if (hp[0] & (HPTE_V_ABSENT | HPTE_V_VALID)) {
+ switch (flags & 3) {
+ case 0: /* absolute */
found = 1;
- break;
+ break;
+ case 1: /* andcond */
+ if (!(hp[0] & args[j + 1]))
+ found = 1;
+ break;
+ case 2: /* AVPN */
+ if ((hp[0] & ~0x7fUL) == args[j + 1])
+ found = 1;
+ break;
+ }
+ }
+ if (!found) {
+ hp[0] &= ~HPTE_V_HVLOCK;
+ args[j] = ((0x90 | flags) << 56) + pte_index;
+ continue;
}
+
+ args[j] = ((0x80 | flags) << 56) + pte_index;
+ rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ /* insert R and C bits from guest PTE */
+ rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
+ args[j] |= rcbits << (56 - 5);
+
+ if (!(hp[0] & HPTE_V_VALID))
+ continue;
+
+ hp[0] &= ~HPTE_V_VALID; /* leave it locked */
+ tlbrb[n] = compute_tlbie_rb(hp[0], hp[1], pte_index);
+ indexes[n] = j;
+ hptes[n] = hp;
+ revs[n] = rev;
+ ++n;
}
- if (!found) {
- hp[0] &= ~HPTE_V_HVLOCK;
- args[i * 2] = ((0x90 | flags) << 56) + pte_index;
- continue;
+
+ if (!n)
+ break;
+
+ /* Now that we've collected a batch, do the tlbies */
+ if (!local) {
+ while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
+ cpu_relax();
+ asm volatile("ptesync" : : : "memory");
+ for (k = 0; k < n; ++k)
+ asm volatile(PPC_TLBIE(%1,%0) : :
+ "r" (tlbrb[k]),
+ "r" (kvm->arch.lpid));
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+ kvm->arch.tlbie_lock = 0;
+ } else {
+ asm volatile("ptesync" : : : "memory");
+ for (k = 0; k < n; ++k)
+ asm volatile("tlbiel %0" : : "r" (tlbrb[k]));
+ asm volatile("ptesync" : : : "memory");
}
- /* insert R and C bits from PTE */
- flags |= (hp[1] >> 5) & 0x0c;
- args[i * 2] = ((0x80 | flags) << 56) + pte_index;
- if (hp[0] & HPTE_V_VALID) {
- tlbrb[n_inval++] = compute_tlbie_rb(hp[0], hp[1], pte_index);
+
+ for (k = 0; k < n; ++k) {
+ j = indexes[k];
+ pte_index = args[j] & ((1ul << 56) - 1);
+ hp = hptes[k];
+ rev = revs[k];
remove_revmap_chain(kvm, pte_index, hp[0]);
+ unlock_hpte(hp, 0);
}
- smp_wmb();
- hp[0] = 0;
- }
- if (n_inval == 0)
- return ret;
-
- if (!local) {
- while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- asm volatile("ptesync" : : : "memory");
- for (i = 0; i < n_inval; ++i)
- asm volatile(PPC_TLBIE(%1,%0)
- : : "r" (tlbrb[i]), "r" (kvm->arch.lpid));
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
- } else {
- asm volatile("ptesync" : : : "memory");
- for (i = 0; i < n_inval; ++i)
- asm volatile("tlbiel %0" : : "r" (tlbrb[i]));
- asm volatile("ptesync" : : : "memory");
}
+
return ret;
}
@@ -720,9 +760,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
gr = rev->guest_rpte;
- /* Unlock the HPTE */
- asm volatile("lwsync" : : : "memory");
- hpte[0] = v;
+ unlock_hpte(hpte, v);
/* For not found, if the HPTE is valid by now, retry the instruction */
if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
--
1.6.0.2
next prev parent reply other threads:[~2012-01-13 14:58 UTC|newest]
Thread overview: 54+ messages / expand[flat|nested] mbox.gz Atom feed top
2012-01-13 14:31 [PULL 00/52] ppc patch queue 2012-01-13 Alexander Graf
2012-01-13 14:31 ` [PATCH 01/52] KVM: PPC: e500: don't translate gfn to pfn with preemption disabled Alexander Graf
2012-01-13 14:31 ` [PATCH 02/52] KVM: PPC: e500: Eliminate preempt_disable in local_sid_destroy_all Alexander Graf
2012-01-13 14:31 ` [PATCH 03/52] KVM: PPC: e500: clear up confusion between host and guest entries Alexander Graf
2012-01-13 14:31 ` [PATCH 04/52] KVM: PPC: e500: MMU API Alexander Graf
2012-01-13 14:31 ` [PATCH 05/52] KVM: PPC: e500: tlbsx: fix tlb0 esel Alexander Graf
2012-01-13 14:31 ` [PATCH 06/52] KVM: PPC: e500: Don't hardcode PIR=0 Alexander Graf
2012-01-13 14:31 ` [PATCH 07/52] KVM: PPC: E500: Support hugetlbfs Alexander Graf
2012-01-13 14:31 ` [PATCH 08/52] PPC: Fix race in mtmsr paravirt implementation Alexander Graf
2012-01-13 14:31 ` [PATCH 09/52] Fix DEC truncation for greater than 0xffff_ffff/1000 Alexander Graf
2012-01-13 14:31 ` [PATCH 10/52] KVM: booke: Do Not start decrementer when SPRN_DEC set 0 Alexander Graf
2012-01-13 14:31 ` [PATCH 11/52] KVM: PPC: booke: check for signals in kvmppc_vcpu_run Alexander Graf
2012-01-13 14:31 ` [PATCH 12/52] KVM: PPC: Rename deliver_interrupts to prepare_to_enter Alexander Graf
2012-01-13 14:31 ` [PATCH 13/52] KVM: PPC: Move prepare_to_enter call site into subarch code Alexander Graf
2012-01-13 14:31 ` [PATCH 14/52] KVM: PPC: booke: Check for MSR[WE] in prepare_to_enter Alexander Graf
2012-01-13 14:31 ` [PATCH 15/52] KVM: PPC: booke: Fix int_pending calculation for MSR[EE] paravirt Alexander Graf
2012-01-13 14:31 ` [PATCH 16/52] KVM: PPC: booke: Paravirtualize wrtee Alexander Graf
2012-01-13 14:31 ` [PATCH 17/52] KVM: PPC: Paravirtualize SPRG4-7, ESR, PIR, MASn Alexander Graf
2012-01-13 14:31 ` [PATCH 18/52] KVM: PPC: booke: Improve timer register emulation Alexander Graf
2012-01-13 14:31 ` [PATCH 19/52] KVM: PPC: Book3s: PR: Disable preemption in vcpu_run Alexander Graf
2012-01-13 14:31 ` [PATCH 20/52] KVM: PPC: Book3s: PR: No irq_disable " Alexander Graf
2012-01-13 14:31 ` [PATCH 21/52] KVM: PPC: Use get/set for to_svcpu to help preemption Alexander Graf
2012-01-13 14:31 ` [PATCH 22/52] KVM: PPC: align vcpu_kick with x86 Alexander Graf
2012-01-13 14:31 ` [PATCH 23/52] KVM: PPC: Book3S: PR: Fix signal check race Alexander Graf
2012-01-13 14:31 ` [PATCH 24/52] KVM: PPC: e500: Fix TLBnCFG in KVM_CONFIG_TLB Alexander Graf
2012-01-13 14:31 ` [PATCH 25/52] KVM: PPC: e500: use hardware hint when loading TLB0 entries Alexander Graf
2012-01-13 14:31 ` [PATCH 26/52] KVM: PPC: Avoid patching paravirt template code Alexander Graf
2012-01-13 14:31 ` [PATCH 27/52] KVM: PPC: Make wakeups work again for Book3S HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 28/52] KVM: PPC: Keep a record of HV guest view of hashed page table entries Alexander Graf
2012-01-13 14:31 ` [PATCH 29/52] KVM: PPC: Keep page physical addresses in per-slot arrays Alexander Graf
2012-01-13 14:31 ` [PATCH 30/52] KVM: PPC: Add an interface for pinning guest pages in Book3s HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 31/52] KVM: PPC: Make the H_ENTER hcall more reliable Alexander Graf
2012-01-13 14:31 ` [PATCH 32/52] KVM: PPC: Only get pages when actually needed, not in prepare_memory_region() Alexander Graf
2012-01-13 14:31 ` [PATCH 33/52] KVM: PPC: Allow use of small pages to back Book3S HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 34/52] KVM: PPC: Allow I/O mappings in memory slots Alexander Graf
2012-01-13 14:31 ` [PATCH 35/52] KVM: PPC: Maintain a doubly-linked list of guest HPTEs for each gfn Alexander Graf
2012-01-13 14:31 ` [PATCH 36/52] KVM: PPC: Implement MMIO emulation support for Book3S HV guests Alexander Graf
2012-01-13 14:31 ` [PATCH 37/52] KVM: PPC: Implement MMU notifiers " Alexander Graf
2012-01-13 14:31 ` [PATCH 38/52] KVM: Add barriers to allow mmu_notifier_retry to be used locklessly Alexander Graf
2012-01-13 14:31 ` [PATCH 39/52] KVM: PPC: Allow for read-only pages backing a Book3S HV guest Alexander Graf
2012-01-13 14:31 ` [PATCH 40/52] KVM: PPC: Fix vcpu_create dereference before validity check Alexander Graf
2012-01-13 14:31 ` [PATCH 41/52] KVM: PPC: Add KVM_CAP_NR_VCPUS and KVM_CAP_MAX_VCPUS Alexander Graf
2012-01-13 14:31 ` Alexander Graf [this message]
2012-01-13 14:31 ` [PATCH 43/52] KVM: PPC: Book3s HV: Maintain separate guest and host views of R and C bits Alexander Graf
2012-01-13 14:31 ` [PATCH 44/52] KVM: PPC: Book3S HV: Use the hardware referenced bit for kvm_age_hva Alexander Graf
2012-01-13 14:31 ` [PATCH 45/52] KVM: PPC: Book3s HV: Implement get_dirty_log using hardware changed bit Alexander Graf
2012-01-13 14:31 ` [PATCH 46/52] KVM: PPC: booke: Add booke206 TLB trace Alexander Graf
2012-01-13 14:31 ` [PATCH 47/52] KVM: PPC: Use the vcpu kmem_cache when allocating new VCPUs Alexander Graf
2012-01-13 14:31 ` [PATCH 48/52] KVM: PPC: Add generic single register ioctls Alexander Graf
2012-01-13 14:31 ` [PATCH 49/52] KVM: PPC: Add support for explicit HIOR setting Alexander Graf
2012-01-13 14:31 ` [PATCH 50/52] KVM: PPC: Move kvm_vcpu_ioctl_[gs]et_one_reg down to platform-specific code Alexander Graf
2012-01-13 14:31 ` [PATCH 51/52] KVM: PPC: Rename MMIO register identifiers Alexander Graf
2012-01-13 14:31 ` [PATCH 52/52] KVM: PPC: refer to paravirt docs in header file Alexander Graf
2012-01-16 10:55 ` [PULL 00/52] ppc patch queue 2012-01-13 Marcelo Tosatti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1326465115-5976-43-git-send-email-agraf@suse.de \
--to=agraf@suse.de \
--cc=avi@redhat.com \
--cc=kvm-ppc@vger.kernel.org \
--cc=kvm@vger.kernel.org \
--cc=mtosatti@redhat.com \
--cc=paulus@samba.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox