From: Alexander Graf <agraf@suse.de>
To: "kvm@vger.kernel.org mailing list" <kvm@vger.kernel.org>
Cc: kvm-ppc@vger.kernel.org, Gleb Natapov <gleb@redhat.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Paul Mackerras <paulus@samba.org>
Subject: [PULL 24/51] KVM: PPC: Book3S PR: Use mmu_notifier_retry() in kvmppc_mmu_map_page()
Date: Thu, 31 Oct 2013 22:18:09 +0100 [thread overview]
Message-ID: <1383254316-11243-25-git-send-email-agraf@suse.de> (raw)
In-Reply-To: <1383254316-11243-1-git-send-email-agraf@suse.de>
From: Paul Mackerras <paulus@samba.org>
When the MM code is invalidating a range of pages, it calls the KVM
kvm_mmu_notifier_invalidate_range_start() notifier function, which calls
kvm_unmap_hva_range(), which arranges to flush all the existing host
HPTEs for guest pages. However, the Linux PTEs for the range being
flushed are still valid at that point. We are not supposed to establish
any new references to pages in the range until the ...range_end()
notifier gets called. The PPC-specific KVM code doesn't get any
explicit notification of that; instead, we are supposed to use
mmu_notifier_retry() to test whether we are or have been inside a
range flush notifier pair while we have been getting a page and
instantiating a host HPTE for the page.
This therefore adds a call to mmu_notifier_retry inside
kvmppc_mmu_map_page(). This call is inside a region locked with
kvm->mmu_lock, which is the same lock that is called by the KVM
MMU notifier functions, thus ensuring that no new notification can
proceed while we are in the locked region. Inside this region we
also create the host HPTE and link the corresponding hpte_cache
structure into the lists used to find it later. We cannot allocate
the hpte_cache structure inside this locked region because that can
lead to deadlock, so we allocate it outside the region and free it
if we end up not using it.
This also moves the updates of vcpu3s->hpte_cache_count inside the
regions locked with vcpu3s->mmu_lock, and does the increment in
kvmppc_mmu_hpte_cache_map() when the pte is added to the cache
rather than when it is allocated, in order that the hpte_cache_count
is accurate.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/kvm_book3s.h | 1 +
arch/powerpc/kvm/book3s_64_mmu_host.c | 37 ++++++++++++++++++++++++++---------
arch/powerpc/kvm/book3s_mmu_hpte.c | 14 +++++++++----
3 files changed, 39 insertions(+), 13 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index a07bd7e..0ec00f4 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -142,6 +142,7 @@ extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr,
extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
+extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte);
extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu);
extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index cc9fb89..307e6e8 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -93,6 +93,13 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
int r = 0;
int hpsize = MMU_PAGE_4K;
bool writable;
+ unsigned long mmu_seq;
+ struct kvm *kvm = vcpu->kvm;
+ struct hpte_cache *cpte;
+
+ /* used to check for invalidations in progress */
+ mmu_seq = kvm->mmu_notifier_seq;
+ smp_rmb();
/* Get host physical address for gpa */
hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
@@ -143,6 +150,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M);
+ cpte = kvmppc_mmu_hpte_cache_next(vcpu);
+
+ spin_lock(&kvm->mmu_lock);
+ if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) {
+ r = -EAGAIN;
+ goto out_unlock;
+ }
+
map_again:
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
@@ -150,7 +165,7 @@ map_again:
if (attempt > 1)
if (ppc_md.hpte_remove(hpteg) < 0) {
r = -1;
- goto out;
+ goto out_unlock;
}
ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
@@ -163,8 +178,6 @@ map_again:
attempt++;
goto map_again;
} else {
- struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
-
trace_kvm_book3s_64_mmu_map(rflags, hpteg,
vpn, hpaddr, orig_pte);
@@ -175,15 +188,21 @@ map_again:
hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
}
- pte->slot = hpteg + (ret & 7);
- pte->host_vpn = vpn;
- pte->pte = *orig_pte;
- pte->pfn = hpaddr >> PAGE_SHIFT;
- pte->pagesize = hpsize;
+ cpte->slot = hpteg + (ret & 7);
+ cpte->host_vpn = vpn;
+ cpte->pte = *orig_pte;
+ cpte->pfn = hpaddr >> PAGE_SHIFT;
+ cpte->pagesize = hpsize;
- kvmppc_mmu_hpte_cache_map(vcpu, pte);
+ kvmppc_mmu_hpte_cache_map(vcpu, cpte);
+ cpte = NULL;
}
+
+out_unlock:
+ spin_unlock(&kvm->mmu_lock);
kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
+ if (cpte)
+ kvmppc_mmu_hpte_cache_free(cpte);
out:
return r;
diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c
index d2d280b..6b79bfc 100644
--- a/arch/powerpc/kvm/book3s_mmu_hpte.c
+++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
@@ -98,6 +98,8 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
&vcpu3s->hpte_hash_vpte_64k[index]);
#endif
+ vcpu3s->hpte_cache_count++;
+
spin_unlock(&vcpu3s->mmu_lock);
}
@@ -131,10 +133,10 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte)
#ifdef CONFIG_PPC_BOOK3S_64
hlist_del_init_rcu(&pte->list_vpte_64k);
#endif
+ vcpu3s->hpte_cache_count--;
spin_unlock(&vcpu3s->mmu_lock);
- vcpu3s->hpte_cache_count--;
call_rcu(&pte->rcu_head, free_pte_rcu);
}
@@ -331,15 +333,19 @@ struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu)
struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
struct hpte_cache *pte;
- pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
- vcpu3s->hpte_cache_count++;
-
if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
kvmppc_mmu_pte_flush_all(vcpu);
+ pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
+
return pte;
}
+void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte)
+{
+ kmem_cache_free(hpte_cache, pte);
+}
+
void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)
{
kvmppc_mmu_pte_flush(vcpu, 0, 0);
--
1.8.1.4
next prev parent reply other threads:[~2013-10-31 21:18 UTC|newest]
Thread overview: 57+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-10-31 21:17 [PULL 00/51] ppc patch queue 2013-10-31 Alexander Graf
2013-10-31 21:17 ` [PULL 01/51] KVM: PPC: Book3S HV: Reserve POWER8 space in get/set_one_reg Alexander Graf
2013-10-31 21:17 ` [PULL 02/51] KVM: PPC: Book3S HV: Save/restore SIAR and SDAR along with other PMU registers Alexander Graf
2013-10-31 21:17 ` [PULL 03/51] KVM: PPC: Book3S HV: Implement timebase offset for guests Alexander Graf
2013-10-31 21:17 ` [PULL 04/51] KVM: PPC: Book3S: Add GET/SET_ONE_REG interface for VRSAVE Alexander Graf
2013-10-31 21:17 ` [PULL 05/51] KVM: PPC: Book3S HV: Implement H_CONFER Alexander Graf
2013-10-31 21:17 ` [PULL 06/51] KVM: PPC: Book3S HV: Restructure kvmppc_hv_entry to be a subroutine Alexander Graf
2013-10-31 21:17 ` [PULL 07/51] KVM: PPC: Book3S HV: Pull out interrupt-reading code into " Alexander Graf
2013-10-31 21:17 ` [PULL 08/51] KVM: PPC: Book3S HV: Avoid unbalanced increments of VPA yield count Alexander Graf
2013-10-31 21:17 ` [PULL 09/51] KVM: PPC: BookE: Add GET/SET_ONE_REG interface for VRSAVE Alexander Graf
2013-10-31 21:17 ` [PULL 10/51] KVM: PPC: Book3S HV: Store LPCR value for each virtual core Alexander Graf
2013-10-31 21:17 ` [PULL 11/51] KVM: PPC: Book3S HV: Add support for guest Program Priority Register Alexander Graf
2013-10-31 21:17 ` [PULL 12/51] KVM: PPC: Book3S HV: Support POWER6 compatibility mode on POWER7 Alexander Graf
2013-10-31 21:17 ` [PULL 13/51] KVM: PPC: Book3S HV: Don't crash host on unknown guest interrupt Alexander Graf
2013-10-31 21:17 ` [PULL 14/51] KVM: PPC: Book3S PR: Fix compilation without CONFIG_ALTIVEC Alexander Graf
2013-10-31 21:18 ` [PULL 15/51] KVM: PPC: Book3S PR: Keep volatile reg values in vcpu rather than shadow_vcpu Alexander Graf
2013-10-31 21:18 ` [PULL 16/51] KVM: PPC: Book3S PR: Allow guest to use 64k pages Alexander Graf
2013-10-31 21:18 ` [PULL 17/51] KVM: PPC: Book3S PR: Use 64k host pages where possible Alexander Graf
2013-10-31 21:18 ` [PULL 18/51] KVM: PPC: Book3S PR: Handle PP0 page-protection bit in guest HPTEs Alexander Graf
2013-10-31 21:18 ` [PULL 19/51] KVM: PPC: Book3S PR: Correct errors in H_ENTER implementation Alexander Graf
2013-10-31 21:18 ` [PULL 20/51] KVM: PPC: Book3S PR: Make HPT accesses and updates SMP-safe Alexander Graf
2013-10-31 21:18 ` [PULL 21/51] KVM: PPC: Book3S PR: Allocate kvm_vcpu structs from kvm_vcpu_cache Alexander Graf
2013-10-31 21:18 ` [PULL 22/51] KVM: PPC: Book3S: Move skip-interrupt handlers to common code Alexander Graf
2013-10-31 21:18 ` [PULL 23/51] KVM: PPC: Book3S PR: Better handling of host-side read-only pages Alexander Graf
2013-10-31 21:18 ` Alexander Graf [this message]
2013-10-31 21:18 ` [PULL 25/51] KVM: PPC: Book3S PR: Mark pages accessed, and dirty if being written Alexander Graf
2013-10-31 21:18 ` [PULL 26/51] KVM: PPC: Book3S PR: Reduce number of shadow PTEs invalidated by MMU notifiers Alexander Graf
2013-10-31 21:18 ` [PULL 27/51] kvm: powerpc: book3s hv: Fix vcore leak Alexander Graf
2013-10-31 21:18 ` [PULL 28/51] KVM: PPC: Book3S HV: Better handling of exceptions that happen in real mode Alexander Graf
2013-10-31 21:18 ` [PULL 29/51] powerpc: book3e: _PAGE_LENDIAN must be _PAGE_ENDIAN Alexander Graf
2013-10-31 21:18 ` [PULL 30/51] kvm: powerpc: allow guest control "E" attribute in mas2 Alexander Graf
2013-10-31 21:18 ` [PULL 31/51] kvm: powerpc: allow guest control "G" " Alexander Graf
2013-10-31 21:18 ` [PULL 32/51] kvm: powerpc: e500: mark page accessed when mapping a guest page Alexander Graf
2013-10-31 21:18 ` [PULL 33/51] powerpc: remove unnecessary line continuations Alexander Graf
2013-10-31 21:18 ` [PULL 34/51] powerpc: move debug registers in a structure Alexander Graf
2013-11-03 14:30 ` Gleb Natapov
2013-11-03 20:56 ` Benjamin Herrenschmidt
2013-11-04 0:03 ` Scott Wood
2013-11-04 6:43 ` Alexander Graf
2013-11-04 6:51 ` Benjamin Herrenschmidt
2013-10-31 21:18 ` [PULL 35/51] powerpc: export debug registers save function for KVM Alexander Graf
2013-10-31 21:18 ` [PULL 36/51] KVM: PPC: E500: exit to user space on "ehpriv 1" instruction Alexander Graf
2013-10-31 21:18 ` [PULL 37/51] KVM: PPC: E500: Using "struct debug_reg" Alexander Graf
2013-10-31 21:18 ` [PULL 38/51] KVM: PPC: E500: Add userspace debug stub support Alexander Graf
2013-10-31 21:18 ` [PULL 39/51] kvm: powerpc: book3s: remove kvmppc_handler_highmem label Alexander Graf
2013-10-31 21:18 ` [PULL 40/51] kvm: powerpc: book3s: move book3s_64_vio_hv.c into the main kernel binary Alexander Graf
2013-10-31 21:18 ` [PULL 41/51] kvm: powerpc: book3s: pr: Rename KVM_BOOK3S_PR to KVM_BOOK3S_PR_POSSIBLE Alexander Graf
2013-10-31 21:18 ` [PULL 42/51] kvm: powerpc: book3s: Add a new config variable CONFIG_KVM_BOOK3S_HV_POSSIBLE Alexander Graf
2013-10-31 21:18 ` [PULL 43/51] kvm: powerpc: Add kvmppc_ops callback Alexander Graf
2013-10-31 21:18 ` [PULL 44/51] kvm: powerpc: book3s: Cleanup interrupt handling code Alexander Graf
2013-10-31 21:18 ` [PULL 45/51] kvm: powerpc: book3s: Add is_hv_enabled to kvmppc_ops Alexander Graf
2013-10-31 21:18 ` [PULL 46/51] kvm: powerpc: book3s: pr: move PR related tracepoints to a separate header Alexander Graf
2013-10-31 21:18 ` [PULL 47/51] kvm: powerpc: booke: Move booke related tracepoints to " Alexander Graf
2013-10-31 21:18 ` [PULL 48/51] kvm: powerpc: book3s: Support building HV and PR KVM as module Alexander Graf
2013-10-31 21:18 ` [PULL 49/51] kvm: Add struct kvm arg to memslot APIs Alexander Graf
2013-10-31 21:18 ` [PULL 50/51] kvm: powerpc: book3s: Allow the HV and PR selection per virtual machine Alexander Graf
2013-10-31 21:18 ` [PULL 51/51] kvm: powerpc: book3s: drop is_hv_enabled Alexander Graf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1383254316-11243-25-git-send-email-agraf@suse.de \
--to=agraf@suse.de \
--cc=gleb@redhat.com \
--cc=kvm-ppc@vger.kernel.org \
--cc=kvm@vger.kernel.org \
--cc=paulus@samba.org \
--cc=pbonzini@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox