* [PATCH 01/28] KVM: Distangle eventfd code from irqchip
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
@ 2012-12-06 0:40 ` Alexander Graf
2012-12-06 0:40 ` [PATCH 02/28] KVM: PPC: Support eventfd Alexander Graf
` (27 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:40 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list
The current eventfd code assumes that when we have eventfd, we also have
irqfd for in-kernel interrupt delivery. This is not necessarily true. On
PPC we don't have an in-kernel irqchip yet, but we can still support easily
support eventfd.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
include/linux/kvm_host.h | 12 +++++++++++-
virt/kvm/eventfd.c | 6 ++++++
2 files changed, 17 insertions(+), 1 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8e5c7b6..c823e47 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -900,10 +900,20 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
#ifdef CONFIG_HAVE_KVM_EVENTFD
void kvm_eventfd_init(struct kvm *kvm);
+int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
+
+#ifdef CONFIG_HAVE_KVM_IRQCHIP
int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args);
void kvm_irqfd_release(struct kvm *kvm);
void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
-int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
+#else
+static inline int kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
+{
+ return -EINVAL;
+}
+
+static inline void kvm_irqfd_release(struct kvm *kvm) {}
+#endif
#else
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 9718e98..d7424c8 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -35,6 +35,7 @@
#include "iodev.h"
+#ifdef __KVM_HAVE_IOAPIC
/*
* --------------------------------------------------------------------
* irqfd: Allows an fd to be used to inject an interrupt to the guest
@@ -425,17 +426,21 @@ fail:
kfree(irqfd);
return ret;
}
+#endif
void
kvm_eventfd_init(struct kvm *kvm)
{
+#ifdef __KVM_HAVE_IOAPIC
spin_lock_init(&kvm->irqfds.lock);
INIT_LIST_HEAD(&kvm->irqfds.items);
INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
mutex_init(&kvm->irqfds.resampler_lock);
+#endif
INIT_LIST_HEAD(&kvm->ioeventfds);
}
+#ifdef __KVM_HAVE_IOAPIC
/*
* shutdown any irqfd's that match fd+gsi
*/
@@ -555,6 +560,7 @@ static void __exit irqfd_module_exit(void)
module_init(irqfd_module_init);
module_exit(irqfd_module_exit);
+#endif
/*
* --------------------------------------------------------------------
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 02/28] KVM: PPC: Support eventfd
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
2012-12-06 0:40 ` [PATCH 01/28] KVM: Distangle eventfd code from irqchip Alexander Graf
@ 2012-12-06 0:40 ` Alexander Graf
2012-12-06 0:40 ` [PATCH 03/28] KVM: PPC: Book3S HV: Restructure HPT entry creation code Alexander Graf
` (26 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:40 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list
In order to support the generic eventfd infrastructure on PPC, we need
to call into the generic KVM in-kernel device mmio code.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/Kconfig | 1 +
arch/powerpc/kvm/Makefile | 4 +++-
arch/powerpc/kvm/powerpc.c | 17 ++++++++++++++++-
3 files changed, 20 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 71f0cd9..4730c95 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -20,6 +20,7 @@ config KVM
bool
select PREEMPT_NOTIFIERS
select ANON_INODES
+ select HAVE_KVM_EVENTFD
config KVM_BOOK3S_HANDLER
bool
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index c2a0863..cd89658 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -6,7 +6,8 @@ subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
-common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \
+ eventfd.o)
CFLAGS_44x_tlb.o := -I.
CFLAGS_e500_tlb.o := -I.
@@ -76,6 +77,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
kvm-book3s_64-module-objs := \
../../../virt/kvm/kvm_main.o \
+ ../../../virt/kvm/eventfd.o \
powerpc.o \
emulate.o \
book3s.o \
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index f9ab12a..d583ea1 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -314,6 +314,7 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_PPC_IRQ_LEVEL:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_ONE_REG:
+ case KVM_CAP_IOEVENTFD:
r = 1;
break;
#ifndef CONFIG_KVM_BOOK3S_64_HV
@@ -618,6 +619,13 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->mmio_is_write = 0;
vcpu->arch.mmio_sign_extend = 0;
+ if (!kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ bytes, &run->mmio.data)) {
+ kvmppc_complete_mmio_load(vcpu, run);
+ vcpu->mmio_needed = 0;
+ return EMULATE_DONE;
+ }
+
return EMULATE_DO_MMIO;
}
@@ -627,8 +635,8 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
{
int r;
- r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
vcpu->arch.mmio_sign_extend = 1;
+ r = kvmppc_handle_load(run, vcpu, rt, bytes, is_bigendian);
return r;
}
@@ -666,6 +674,13 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
}
+ if (!kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ bytes, &run->mmio.data)) {
+ kvmppc_complete_mmio_load(vcpu, run);
+ vcpu->mmio_needed = 0;
+ return EMULATE_DONE;
+ }
+
return EMULATE_DO_MMIO;
}
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 03/28] KVM: PPC: Book3S HV: Restructure HPT entry creation code
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
2012-12-06 0:40 ` [PATCH 01/28] KVM: Distangle eventfd code from irqchip Alexander Graf
2012-12-06 0:40 ` [PATCH 02/28] KVM: PPC: Support eventfd Alexander Graf
@ 2012-12-06 0:40 ` Alexander Graf
2012-12-06 0:40 ` [PATCH 04/28] KVM: PPC: Book3S HV: Fix bug causing loss of page dirty state Alexander Graf
` (25 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:40 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
This restructures the code that creates HPT (hashed page table)
entries so that it can be called in situations where we don't have a
struct vcpu pointer, only a struct kvm pointer. It also fixes a bug
where kvmppc_map_vrma() would corrupt the guest R4 value.
Most of the work of kvmppc_virtmode_h_enter is now done by a new
function, kvmppc_virtmode_do_h_enter, which itself calls another new
function, kvmppc_do_h_enter, which contains most of the old
kvmppc_h_enter. The new kvmppc_do_h_enter takes explicit arguments
for the place to return the HPTE index, the Linux page tables to use,
and whether it is being called in real mode, thus removing the need
for it to have the vcpu as an argument.
Currently kvmppc_map_vrma creates the VRMA (virtual real mode area)
HPTEs by calling kvmppc_virtmode_h_enter, which is designed primarily
to handle H_ENTER hcalls from the guest that need to pin a page of
memory. Since H_ENTER returns the index of the created HPTE in R4,
kvmppc_virtmode_h_enter updates the guest R4, corrupting the guest R4
in the case when it gets called from kvmppc_map_vrma on the first
VCPU_RUN ioctl. With this, kvmppc_map_vrma instead calls
kvmppc_virtmode_do_h_enter with the address of a dummy word as the
place to store the HPTE index, thus avoiding corrupting the guest R4.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/kvm_book3s.h | 5 ++-
arch/powerpc/kvm/book3s_64_mmu_hv.c | 36 ++++++++++++++++++++++----------
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 27 +++++++++++++++---------
3 files changed, 45 insertions(+), 23 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 36fcf41..fea768f 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -157,8 +157,9 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr);
extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel);
-extern long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh, unsigned long ptel);
+extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel,
+ pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot, unsigned long *map);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 2a89a36..6ee6516 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -41,6 +41,10 @@
/* Power architecture requires HPT is at least 256kB */
#define PPC_MIN_HPT_ORDER 18
+static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel, unsigned long *pte_idx_ret);
+
long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
{
unsigned long hpt;
@@ -185,6 +189,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
unsigned long addr, hash;
unsigned long psize;
unsigned long hp0, hp1;
+ unsigned long idx_ret;
long ret;
struct kvm *kvm = vcpu->kvm;
@@ -216,7 +221,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
hash = (hash << 3) + 7;
hp_v = hp0 | ((addr >> 16) & ~0x7fUL);
hp_r = hp1 | addr;
- ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r);
+ ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, hash, hp_v, hp_r,
+ &idx_ret);
if (ret != H_SUCCESS) {
pr_err("KVM: map_vrma at %lx failed, ret=%ld\n",
addr, ret);
@@ -354,15 +360,10 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
return err;
}
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh, unsigned long ptel)
+long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel, unsigned long *pte_idx_ret)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long psize, gpa, gfn;
struct kvm_memory_slot *memslot;
long ret;
@@ -390,8 +391,8 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
do_insert:
/* Protect linux PTE lookup from page table destruction */
rcu_read_lock_sched(); /* this disables preemption too */
- vcpu->arch.pgdir = current->mm->pgd;
- ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel);
+ ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
+ current->mm->pgd, false, pte_idx_ret);
rcu_read_unlock_sched();
if (ret == H_TOO_HARD) {
/* this can't happen */
@@ -402,6 +403,19 @@ long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
}
+/*
+ * We come here on a H_ENTER call from the guest when we are not
+ * using mmu notifiers and we don't have the requested page pinned
+ * already.
+ */
+long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh,
+ unsigned long ptel)
+{
+ return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
+ pteh, ptel, &vcpu->arch.gpr[4]);
+}
+
static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
gva_t eaddr)
{
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 5e06e31..362dffe 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -103,14 +103,14 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
unlock_rmap(rmap);
}
-static pte_t lookup_linux_pte(struct kvm_vcpu *vcpu, unsigned long hva,
+static pte_t lookup_linux_pte(pgd_t *pgdir, unsigned long hva,
int writing, unsigned long *pte_sizep)
{
pte_t *ptep;
unsigned long ps = *pte_sizep;
unsigned int shift;
- ptep = find_linux_pte_or_hugepte(vcpu->arch.pgdir, hva, &shift);
+ ptep = find_linux_pte_or_hugepte(pgdir, hva, &shift);
if (!ptep)
return __pte(0);
if (shift)
@@ -130,10 +130,10 @@ static inline void unlock_hpte(unsigned long *hpte, unsigned long hpte_v)
hpte[0] = hpte_v;
}
-long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh, unsigned long ptel)
+long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel,
+ pgd_t *pgdir, bool realmode, unsigned long *pte_idx_ret)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long i, pa, gpa, gfn, psize;
unsigned long slot_fn, hva;
unsigned long *hpte;
@@ -147,7 +147,6 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned int writing;
unsigned long mmu_seq;
unsigned long rcbits;
- bool realmode = vcpu->arch.vcore->vcore_state == VCORE_RUNNING;
psize = hpte_page_size(pteh, ptel);
if (!psize)
@@ -201,7 +200,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
/* Look up the Linux PTE for the backing page */
pte_size = psize;
- pte = lookup_linux_pte(vcpu, hva, writing, &pte_size);
+ pte = lookup_linux_pte(pgdir, hva, writing, &pte_size);
if (pte_present(pte)) {
if (writing && !pte_write(pte))
/* make the actual HPTE be read-only */
@@ -210,6 +209,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
pa = pte_pfn(pte) << PAGE_SHIFT;
}
}
+
if (pte_size < psize)
return H_PARAMETER;
if (pa && pte_size > psize)
@@ -297,7 +297,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
lock_rmap(rmap);
/* Check for pending invalidations under the rmap chain lock */
if (kvm->arch.using_mmu_notifiers &&
- mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
+ mmu_notifier_retry(kvm, mmu_seq)) {
/* inval in progress, write a non-present HPTE */
pteh |= HPTE_V_ABSENT;
pteh &= ~HPTE_V_VALID;
@@ -318,10 +318,17 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
hpte[0] = pteh;
asm volatile("ptesync" : : : "memory");
- vcpu->arch.gpr[4] = pte_index;
+ *pte_idx_ret = pte_index;
return H_SUCCESS;
}
-EXPORT_SYMBOL_GPL(kvmppc_h_enter);
+EXPORT_SYMBOL_GPL(kvmppc_do_h_enter);
+
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel)
+{
+ return kvmppc_do_h_enter(vcpu->kvm, flags, pte_index, pteh, ptel,
+ vcpu->arch.pgdir, true, &vcpu->arch.gpr[4]);
+}
#define LOCK_TOKEN (*(u32 *)(&get_paca()->lock_token))
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 04/28] KVM: PPC: Book3S HV: Fix bug causing loss of page dirty state
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (2 preceding siblings ...)
2012-12-06 0:40 ` [PATCH 03/28] KVM: PPC: Book3S HV: Restructure HPT entry creation code Alexander Graf
@ 2012-12-06 0:40 ` Alexander Graf
2012-12-06 0:40 ` [PATCH 05/28] KVM: PPC: Book3S HV: Add a mechanism for recording modified HPTEs Alexander Graf
` (24 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:40 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
This fixes a bug where adding a new guest HPT entry via the H_ENTER
hcall would lose the "changed" bit in the reverse map information
for the guest physical page being mapped. The result was that the
KVM_GET_DIRTY_LOG could return a zero bit for the page even though
the page had been modified by the guest.
This fixes it by only modifying the index and present bits in the
reverse map entry, thus preserving the reference and change bits.
We were also unnecessarily setting the reference bit, and this
fixes that too.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 6 +++---
1 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 362dffe..ff2da5c 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -59,10 +59,10 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
head->back = pte_index;
} else {
rev->forw = rev->back = pte_index;
- i = pte_index;
+ *rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
+ pte_index | KVMPPC_RMAP_PRESENT;
}
- smp_wmb();
- *rmap = i | KVMPPC_RMAP_REFERENCED | KVMPPC_RMAP_PRESENT; /* unlock */
+ unlock_rmap(rmap);
}
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 05/28] KVM: PPC: Book3S HV: Add a mechanism for recording modified HPTEs
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (3 preceding siblings ...)
2012-12-06 0:40 ` [PATCH 04/28] KVM: PPC: Book3S HV: Fix bug causing loss of page dirty state Alexander Graf
@ 2012-12-06 0:40 ` Alexander Graf
2012-12-06 0:40 ` [PATCH 06/28] KVM: PPC: Book3S HV: Make a HPTE removal function available Alexander Graf
` (23 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:40 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
This uses a bit in our record of the guest view of the HPTE to record
when the HPTE gets modified. We use a reserved bit for this, and ensure
that this bit is always cleared in HPTE values returned to the guest.
The recording of modified HPTEs is only done if other code indicates
its interest by setting kvm->arch.hpte_mod_interest to a non-zero value.
The reason for this is that when later commits add facilities for
userspace to read the HPT, the first pass of reading the HPT will be
quicker if there are no (or very few) HPTEs marked as modified,
rather than having most HPTEs marked as modified.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/kvm_book3s_64.h | 9 +++++++++
arch/powerpc/include/asm/kvm_host.h | 1 +
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 28 ++++++++++++++++++++++++----
3 files changed, 34 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 1472a5b..b322e5b 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -50,6 +50,15 @@ extern int kvm_hpt_order; /* order of preallocated HPTs */
#define HPTE_V_HVLOCK 0x40UL
#define HPTE_V_ABSENT 0x20UL
+/*
+ * We use this bit in the guest_rpte field of the revmap entry
+ * to indicate a modified HPTE.
+ */
+#define HPTE_GR_MODIFIED (1ul << 62)
+
+/* These bits are reserved in the guest view of the HPTE */
+#define HPTE_GR_RESERVED HPTE_GR_MODIFIED
+
static inline long try_lock_hpte(unsigned long *hpte, unsigned long bits)
{
unsigned long tmp, old;
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 3093896..58c7264 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -248,6 +248,7 @@ struct kvm_arch {
atomic_t vcpus_running;
unsigned long hpt_npte;
unsigned long hpt_mask;
+ atomic_t hpte_mod_interest;
spinlock_t slot_phys_lock;
unsigned short last_vcpu[NR_CPUS];
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index ff2da5c..ed563a5 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -66,6 +66,17 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
}
EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain);
+/*
+ * Note modification of an HPTE; set the HPTE modified bit
+ * if anyone is interested.
+ */
+static inline void note_hpte_modification(struct kvm *kvm,
+ struct revmap_entry *rev)
+{
+ if (atomic_read(&kvm->arch.hpte_mod_interest))
+ rev->guest_rpte |= HPTE_GR_MODIFIED;
+}
+
/* Remove this HPTE from the chain for a real page */
static void remove_revmap_chain(struct kvm *kvm, long pte_index,
struct revmap_entry *rev,
@@ -138,7 +149,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
unsigned long slot_fn, hva;
unsigned long *hpte;
struct revmap_entry *rev;
- unsigned long g_ptel = ptel;
+ unsigned long g_ptel;
struct kvm_memory_slot *memslot;
unsigned long *physp, pte_size;
unsigned long is_io;
@@ -153,6 +164,8 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
return H_PARAMETER;
writing = hpte_is_writable(ptel);
pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
+ ptel &= ~HPTE_GR_RESERVED;
+ g_ptel = ptel;
/* used later to detect if we might have been invalidated */
mmu_seq = kvm->mmu_notifier_seq;
@@ -287,8 +300,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
rev = &kvm->arch.revmap[pte_index];
if (realmode)
rev = real_vmalloc_addr(rev);
- if (rev)
+ if (rev) {
rev->guest_rpte = g_ptel;
+ note_hpte_modification(kvm, rev);
+ }
/* Link HPTE into reverse-map chain */
if (pteh & HPTE_V_VALID) {
@@ -392,7 +407,8 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
/* Read PTE low word after tlbie to get final R/C values */
remove_revmap_chain(kvm, pte_index, rev, v, hpte[1]);
}
- r = rev->guest_rpte;
+ r = rev->guest_rpte & ~HPTE_GR_RESERVED;
+ note_hpte_modification(kvm, rev);
unlock_hpte(hpte, 0);
vcpu->arch.gpr[4] = v;
@@ -466,6 +482,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
args[j] = ((0x80 | flags) << 56) + pte_index;
rev = real_vmalloc_addr(&kvm->arch.revmap[pte_index]);
+ note_hpte_modification(kvm, rev);
if (!(hp[0] & HPTE_V_VALID)) {
/* insert R and C bits from PTE */
@@ -555,6 +572,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
if (rev) {
r = (rev->guest_rpte & ~mask) | bits;
rev->guest_rpte = r;
+ note_hpte_modification(kvm, rev);
}
r = (hpte[1] & ~mask) | bits;
@@ -606,8 +624,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
}
- if (v & HPTE_V_VALID)
+ if (v & HPTE_V_VALID) {
r = rev[i].guest_rpte | (r & (HPTE_R_R | HPTE_R_C));
+ r &= ~HPTE_GR_RESERVED;
+ }
vcpu->arch.gpr[4 + i * 2] = v;
vcpu->arch.gpr[5 + i * 2] = r;
}
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 06/28] KVM: PPC: Book3S HV: Make a HPTE removal function available
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (4 preceding siblings ...)
2012-12-06 0:40 ` [PATCH 05/28] KVM: PPC: Book3S HV: Add a mechanism for recording modified HPTEs Alexander Graf
@ 2012-12-06 0:40 ` Alexander Graf
2012-12-06 0:40 ` [PATCH 07/28] KVM: PPC: Book3S HV: Provide a method for userspace to read and write the HPT Alexander Graf
` (22 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:40 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
This makes a HPTE removal function, kvmppc_do_h_remove(), available
outside book3s_hv_rm_mmu.c. This will be used by the HPT writing
code.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/kvm_book3s.h | 3 +++
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 19 +++++++++++++------
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index fea768f..46763d1 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -160,6 +160,9 @@ extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel,
pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
+extern long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long *hpret);
extern long kvmppc_hv_get_dirty_log(struct kvm *kvm,
struct kvm_memory_slot *memslot, unsigned long *map);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index ed563a5..fc3da32 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -365,11 +365,10 @@ static inline int try_lock_tlbie(unsigned int *lock)
return old == 0;
}
-long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
- unsigned long pte_index, unsigned long avpn,
- unsigned long va)
+long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long *hpret)
{
- struct kvm *kvm = vcpu->kvm;
unsigned long *hpte;
unsigned long v, r, rb;
struct revmap_entry *rev;
@@ -411,10 +410,18 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
note_hpte_modification(kvm, rev);
unlock_hpte(hpte, 0);
- vcpu->arch.gpr[4] = v;
- vcpu->arch.gpr[5] = r;
+ hpret[0] = v;
+ hpret[1] = r;
return H_SUCCESS;
}
+EXPORT_SYMBOL_GPL(kvmppc_do_h_remove);
+
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn)
+{
+ return kvmppc_do_h_remove(vcpu->kvm, flags, pte_index, avpn,
+ &vcpu->arch.gpr[4]);
+}
long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
{
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 07/28] KVM: PPC: Book3S HV: Provide a method for userspace to read and write the HPT
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (5 preceding siblings ...)
2012-12-06 0:40 ` [PATCH 06/28] KVM: PPC: Book3S HV: Make a HPTE removal function available Alexander Graf
@ 2012-12-06 0:40 ` Alexander Graf
2012-12-06 0:40 ` [PATCH 08/28] KVM: PPC: Book3S HV: Reset reverse-map chains when resetting " Alexander Graf
` (21 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:40 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
A new ioctl, KVM_PPC_GET_HTAB_FD, returns a file descriptor. Reads on
this fd return the contents of the HPT (hashed page table), writes
create and/or remove entries in the HPT. There is a new capability,
KVM_CAP_PPC_HTAB_FD, to indicate the presence of the ioctl. The ioctl
takes an argument structure with the index of the first HPT entry to
read out and a set of flags. The flags indicate whether the user is
intending to read or write the HPT, and whether to return all entries
or only the "bolted" entries (those with the bolted bit, 0x10, set in
the first doubleword).
This is intended for use in implementing qemu's savevm/loadvm and for
live migration. Therefore, on reads, the first pass returns information
about all HPTEs (or all bolted HPTEs). When the first pass reaches the
end of the HPT, it returns from the read. Subsequent reads only return
information about HPTEs that have changed since they were last read.
A read that finds no changed HPTEs in the HPT following where the last
read finished will return 0 bytes.
The format of the data provides a simple run-length compression of the
invalid entries. Each block of data starts with a header that indicates
the index (position in the HPT, which is just an array), the number of
valid entries starting at that index (may be zero), and the number of
invalid entries following those valid entries. The valid entries, 16
bytes each, follow the header. The invalid entries are not explicitly
represented.
Signed-off-by: Paul Mackerras <paulus@samba.org>
[agraf: fix documentation]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
Documentation/virtual/kvm/api.txt | 54 +++++
arch/powerpc/include/asm/kvm_book3s_64.h | 22 ++
arch/powerpc/include/asm/kvm_ppc.h | 2 +
arch/powerpc/include/uapi/asm/kvm.h | 25 +++
arch/powerpc/kvm/book3s_64_mmu_hv.c | 344 ++++++++++++++++++++++++++++++
arch/powerpc/kvm/book3s_hv.c | 12 -
arch/powerpc/kvm/powerpc.c | 17 ++
include/uapi/linux/kvm.h | 3 +
8 files changed, 467 insertions(+), 12 deletions(-)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 6671fdc..a5607c5 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2071,6 +2071,60 @@ KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm
Note that the vcpu ioctl is asynchronous to vcpu execution.
+4.78 KVM_PPC_GET_HTAB_FD
+
+Capability: KVM_CAP_PPC_HTAB_FD
+Architectures: powerpc
+Type: vm ioctl
+Parameters: Pointer to struct kvm_get_htab_fd (in)
+Returns: file descriptor number (>= 0) on success, -1 on error
+
+This returns a file descriptor that can be used either to read out the
+entries in the guest's hashed page table (HPT), or to write entries to
+initialize the HPT. The returned fd can only be written to if the
+KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and
+can only be read if that bit is clear. The argument struct looks like
+this:
+
+/* For KVM_PPC_GET_HTAB_FD */
+struct kvm_get_htab_fd {
+ __u64 flags;
+ __u64 start_index;
+ __u64 reserved[2];
+};
+
+/* Values for kvm_get_htab_fd.flags */
+#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
+#define KVM_GET_HTAB_WRITE ((__u64)0x2)
+
+The `start_index' field gives the index in the HPT of the entry at
+which to start reading. It is ignored when writing.
+
+Reads on the fd will initially supply information about all
+"interesting" HPT entries. Interesting entries are those with the
+bolted bit set, if the KVM_GET_HTAB_BOLTED_ONLY bit is set, otherwise
+all entries. When the end of the HPT is reached, the read() will
+return. If read() is called again on the fd, it will start again from
+the beginning of the HPT, but will only return HPT entries that have
+changed since they were last read.
+
+Data read or written is structured as a header (8 bytes) followed by a
+series of valid HPT entries (16 bytes) each. The header indicates how
+many valid HPT entries there are and how many invalid entries follow
+the valid entries. The invalid entries are not represented explicitly
+in the stream. The header format is:
+
+struct kvm_get_htab_header {
+ __u32 index;
+ __u16 n_valid;
+ __u16 n_invalid;
+};
+
+Writes to the fd create HPT entries starting at the index given in the
+header; first `n_valid' valid entries with contents from the data
+written, then `n_invalid' invalid entries, invalidating any previously
+valid entries found.
+
5. The kvm_run structure
------------------------
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index b322e5b..38bec1d 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -246,4 +246,26 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot,
return !(memslot->base_gfn & mask) && !(memslot->npages & mask);
}
+/*
+ * This works for 4k, 64k and 16M pages on POWER7,
+ * and 4k and 16M pages on PPC970.
+ */
+static inline unsigned long slb_pgsize_encoding(unsigned long psize)
+{
+ unsigned long senc = 0;
+
+ if (psize > 0x1000) {
+ senc = SLB_VSID_L;
+ if (psize == 0x10000)
+ senc |= SLB_VSID_LP_01;
+ }
+ return senc;
+}
+
+static inline int is_vrma_hpte(unsigned long hpte_v)
+{
+ return (hpte_v & ~0xffffffUL) ==
+ (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)));
+}
+
#endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 609cca3..1ca31e9 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -164,6 +164,8 @@ extern void kvmppc_bookehv_exit(void);
extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu);
+extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *);
+
/*
* Cuts out inst bits with ordering according to spec.
* That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index b89ae4d..514883d 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -331,6 +331,31 @@ struct kvm_book3e_206_tlb_params {
__u32 reserved[8];
};
+/* For KVM_PPC_GET_HTAB_FD */
+struct kvm_get_htab_fd {
+ __u64 flags;
+ __u64 start_index;
+ __u64 reserved[2];
+};
+
+/* Values for kvm_get_htab_fd.flags */
+#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1)
+#define KVM_GET_HTAB_WRITE ((__u64)0x2)
+
+/*
+ * Data read on the file descriptor is formatted as a series of
+ * records, each consisting of a header followed by a series of
+ * `n_valid' HPTEs (16 bytes each), which are all valid. Following
+ * those valid HPTEs there are `n_invalid' invalid HPTEs, which
+ * are not represented explicitly in the stream. The same format
+ * is used for writing.
+ */
+struct kvm_get_htab_header {
+ __u32 index;
+ __u16 n_valid;
+ __u16 n_invalid;
+};
+
#define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1)
#define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2)
#define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 6ee6516..0aa4073 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -25,6 +25,8 @@
#include <linux/hugetlb.h>
#include <linux/vmalloc.h>
#include <linux/srcu.h>
+#include <linux/anon_inodes.h>
+#include <linux/file.h>
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
@@ -1145,6 +1147,348 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
put_page(page);
}
+/*
+ * Functions for reading and writing the hash table via reads and
+ * writes on a file descriptor.
+ *
+ * Reads return the guest view of the hash table, which has to be
+ * pieced together from the real hash table and the guest_rpte
+ * values in the revmap array.
+ *
+ * On writes, each HPTE written is considered in turn, and if it
+ * is valid, it is written to the HPT as if an H_ENTER with the
+ * exact flag set was done. When the invalid count is non-zero
+ * in the header written to the stream, the kernel will make
+ * sure that that many HPTEs are invalid, and invalidate them
+ * if not.
+ */
+
+struct kvm_htab_ctx {
+ unsigned long index;
+ unsigned long flags;
+ struct kvm *kvm;
+ int first_pass;
+};
+
+#define HPTE_SIZE (2 * sizeof(unsigned long))
+
+static long record_hpte(unsigned long flags, unsigned long *hptp,
+ unsigned long *hpte, struct revmap_entry *revp,
+ int want_valid, int first_pass)
+{
+ unsigned long v, r;
+ int ok = 1;
+ int valid, dirty;
+
+ /* Unmodified entries are uninteresting except on the first pass */
+ dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+ if (!first_pass && !dirty)
+ return 0;
+
+ valid = 0;
+ if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) {
+ valid = 1;
+ if ((flags & KVM_GET_HTAB_BOLTED_ONLY) &&
+ !(hptp[0] & HPTE_V_BOLTED))
+ valid = 0;
+ }
+ if (valid != want_valid)
+ return 0;
+
+ v = r = 0;
+ if (valid || dirty) {
+ /* lock the HPTE so it's stable and read it */
+ preempt_disable();
+ while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
+ cpu_relax();
+ v = hptp[0];
+ if (v & HPTE_V_ABSENT) {
+ v &= ~HPTE_V_ABSENT;
+ v |= HPTE_V_VALID;
+ }
+ /* re-evaluate valid and dirty from synchronized HPTE value */
+ valid = !!(v & HPTE_V_VALID);
+ if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED))
+ valid = 0;
+ r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C));
+ dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED);
+ /* only clear modified if this is the right sort of entry */
+ if (valid == want_valid && dirty) {
+ r &= ~HPTE_GR_MODIFIED;
+ revp->guest_rpte = r;
+ }
+ asm volatile(PPC_RELEASE_BARRIER "" : : : "memory");
+ hptp[0] &= ~HPTE_V_HVLOCK;
+ preempt_enable();
+ if (!(valid == want_valid && (first_pass || dirty)))
+ ok = 0;
+ }
+ hpte[0] = v;
+ hpte[1] = r;
+ return ok;
+}
+
+static ssize_t kvm_htab_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kvm_htab_ctx *ctx = file->private_data;
+ struct kvm *kvm = ctx->kvm;
+ struct kvm_get_htab_header hdr;
+ unsigned long *hptp;
+ struct revmap_entry *revp;
+ unsigned long i, nb, nw;
+ unsigned long __user *lbuf;
+ struct kvm_get_htab_header __user *hptr;
+ unsigned long flags;
+ int first_pass;
+ unsigned long hpte[2];
+
+ if (!access_ok(VERIFY_WRITE, buf, count))
+ return -EFAULT;
+
+ first_pass = ctx->first_pass;
+ flags = ctx->flags;
+
+ i = ctx->index;
+ hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+ revp = kvm->arch.revmap + i;
+ lbuf = (unsigned long __user *)buf;
+
+ nb = 0;
+ while (nb + sizeof(hdr) + HPTE_SIZE < count) {
+ /* Initialize header */
+ hptr = (struct kvm_get_htab_header __user *)buf;
+ hdr.index = i;
+ hdr.n_valid = 0;
+ hdr.n_invalid = 0;
+ nw = nb;
+ nb += sizeof(hdr);
+ lbuf = (unsigned long __user *)(buf + sizeof(hdr));
+
+ /* Skip uninteresting entries, i.e. clean on not-first pass */
+ if (!first_pass) {
+ while (i < kvm->arch.hpt_npte &&
+ !(revp->guest_rpte & HPTE_GR_MODIFIED)) {
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+ }
+
+ /* Grab a series of valid entries */
+ while (i < kvm->arch.hpt_npte &&
+ hdr.n_valid < 0xffff &&
+ nb + HPTE_SIZE < count &&
+ record_hpte(flags, hptp, hpte, revp, 1, first_pass)) {
+ /* valid entry, write it out */
+ ++hdr.n_valid;
+ if (__put_user(hpte[0], lbuf) ||
+ __put_user(hpte[1], lbuf + 1))
+ return -EFAULT;
+ nb += HPTE_SIZE;
+ lbuf += 2;
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+ /* Now skip invalid entries while we can */
+ while (i < kvm->arch.hpt_npte &&
+ hdr.n_invalid < 0xffff &&
+ record_hpte(flags, hptp, hpte, revp, 0, first_pass)) {
+ /* found an invalid entry */
+ ++hdr.n_invalid;
+ ++i;
+ hptp += 2;
+ ++revp;
+ }
+
+ if (hdr.n_valid || hdr.n_invalid) {
+ /* write back the header */
+ if (__copy_to_user(hptr, &hdr, sizeof(hdr)))
+ return -EFAULT;
+ nw = nb;
+ buf = (char __user *)lbuf;
+ } else {
+ nb = nw;
+ }
+
+ /* Check if we've wrapped around the hash table */
+ if (i >= kvm->arch.hpt_npte) {
+ i = 0;
+ ctx->first_pass = 0;
+ break;
+ }
+ }
+
+ ctx->index = i;
+
+ return nb;
+}
+
+static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct kvm_htab_ctx *ctx = file->private_data;
+ struct kvm *kvm = ctx->kvm;
+ struct kvm_get_htab_header hdr;
+ unsigned long i, j;
+ unsigned long v, r;
+ unsigned long __user *lbuf;
+ unsigned long *hptp;
+ unsigned long tmp[2];
+ ssize_t nb;
+ long int err, ret;
+ int rma_setup;
+
+ if (!access_ok(VERIFY_READ, buf, count))
+ return -EFAULT;
+
+ /* lock out vcpus from running while we're doing this */
+ mutex_lock(&kvm->lock);
+ rma_setup = kvm->arch.rma_setup_done;
+ if (rma_setup) {
+ kvm->arch.rma_setup_done = 0; /* temporarily */
+ /* order rma_setup_done vs. vcpus_running */
+ smp_mb();
+ if (atomic_read(&kvm->arch.vcpus_running)) {
+ kvm->arch.rma_setup_done = 1;
+ mutex_unlock(&kvm->lock);
+ return -EBUSY;
+ }
+ }
+
+ err = 0;
+ for (nb = 0; nb + sizeof(hdr) <= count; ) {
+ err = -EFAULT;
+ if (__copy_from_user(&hdr, buf, sizeof(hdr)))
+ break;
+
+ err = 0;
+ if (nb + hdr.n_valid * HPTE_SIZE > count)
+ break;
+
+ nb += sizeof(hdr);
+ buf += sizeof(hdr);
+
+ err = -EINVAL;
+ i = hdr.index;
+ if (i >= kvm->arch.hpt_npte ||
+ i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte)
+ break;
+
+ hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
+ lbuf = (unsigned long __user *)buf;
+ for (j = 0; j < hdr.n_valid; ++j) {
+ err = -EFAULT;
+ if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+ goto out;
+ err = -EINVAL;
+ if (!(v & HPTE_V_VALID))
+ goto out;
+ lbuf += 2;
+ nb += HPTE_SIZE;
+
+ if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+ kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
+ err = -EIO;
+ ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
+ tmp);
+ if (ret != H_SUCCESS) {
+ pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
+ "r=%lx\n", ret, i, v, r);
+ goto out;
+ }
+ if (!rma_setup && is_vrma_hpte(v)) {
+ unsigned long psize = hpte_page_size(v, r);
+ unsigned long senc = slb_pgsize_encoding(psize);
+ unsigned long lpcr;
+
+ kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ lpcr = kvm->arch.lpcr & ~LPCR_VRMASD;
+ lpcr |= senc << (LPCR_VRMASD_SH - 4);
+ kvm->arch.lpcr = lpcr;
+ rma_setup = 1;
+ }
+ ++i;
+ hptp += 2;
+ }
+
+ for (j = 0; j < hdr.n_invalid; ++j) {
+ if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT))
+ kvmppc_do_h_remove(kvm, 0, i, 0, tmp);
+ ++i;
+ hptp += 2;
+ }
+ err = 0;
+ }
+
+ out:
+ /* Order HPTE updates vs. rma_setup_done */
+ smp_wmb();
+ kvm->arch.rma_setup_done = rma_setup;
+ mutex_unlock(&kvm->lock);
+
+ if (err)
+ return err;
+ return nb;
+}
+
+static int kvm_htab_release(struct inode *inode, struct file *filp)
+{
+ struct kvm_htab_ctx *ctx = filp->private_data;
+
+ filp->private_data = NULL;
+ if (!(ctx->flags & KVM_GET_HTAB_WRITE))
+ atomic_dec(&ctx->kvm->arch.hpte_mod_interest);
+ kvm_put_kvm(ctx->kvm);
+ kfree(ctx);
+ return 0;
+}
+
+static struct file_operations kvm_htab_fops = {
+ .read = kvm_htab_read,
+ .write = kvm_htab_write,
+ .llseek = default_llseek,
+ .release = kvm_htab_release,
+};
+
+int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf)
+{
+ int ret;
+ struct kvm_htab_ctx *ctx;
+ int rwflag;
+
+ /* reject flags we don't recognize */
+ if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE))
+ return -EINVAL;
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+ kvm_get_kvm(kvm);
+ ctx->kvm = kvm;
+ ctx->index = ghf->start_index;
+ ctx->flags = ghf->flags;
+ ctx->first_pass = 1;
+
+ rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY;
+ ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag);
+ if (ret < 0) {
+ kvm_put_kvm(kvm);
+ return ret;
+ }
+
+ if (rwflag == O_RDONLY) {
+ mutex_lock(&kvm->slots_lock);
+ atomic_inc(&kvm->arch.hpte_mod_interest);
+ /* make sure kvmppc_do_h_enter etc. see the increment */
+ synchronize_srcu_expedited(&kvm->srcu);
+ mutex_unlock(&kvm->slots_lock);
+ }
+
+ return ret;
+}
+
void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
{
struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 843eb75..a4f59db 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1563,18 +1563,6 @@ out:
return r;
}
-static unsigned long slb_pgsize_encoding(unsigned long psize)
-{
- unsigned long senc = 0;
-
- if (psize > 0x1000) {
- senc = SLB_VSID_L;
- if (psize == 0x10000)
- senc |= SLB_VSID_LP_01;
- }
- return senc;
-}
-
static void unpin_slot(struct kvm_memory_slot *memslot)
{
unsigned long *physp;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index d583ea1..70739a0 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -354,6 +354,12 @@ int kvm_dev_ioctl_check_extension(long ext)
r = 1;
#else
r = 0;
+ break;
+#endif
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+ case KVM_CAP_PPC_HTAB_FD:
+ r = 1;
+ break;
#endif
break;
case KVM_CAP_NR_VCPUS:
@@ -954,6 +960,17 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = 0;
break;
}
+
+ case KVM_PPC_GET_HTAB_FD: {
+ struct kvm *kvm = filp->private_data;
+ struct kvm_get_htab_fd ghf;
+
+ r = -EFAULT;
+ if (copy_from_user(&ghf, argp, sizeof(ghf)))
+ break;
+ r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf);
+ break;
+ }
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 494a84c..e6e5d4b 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -634,6 +634,7 @@ struct kvm_ppc_smmu_info {
#endif
#define KVM_CAP_IRQFD_RESAMPLE 82
#define KVM_CAP_PPC_BOOKE_WATCHDOG 83
+#define KVM_CAP_PPC_HTAB_FD 84
#ifdef KVM_CAP_IRQ_ROUTING
@@ -859,6 +860,8 @@ struct kvm_s390_ucas_mapping {
#define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce)
/* Available with KVM_CAP_RMA */
#define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma)
+/* Available with KVM_CAP_PPC_HTAB_FD */
+#define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd)
/*
* ioctls for vcpu fds
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 08/28] KVM: PPC: Book3S HV: Reset reverse-map chains when resetting the HPT
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (6 preceding siblings ...)
2012-12-06 0:40 ` [PATCH 07/28] KVM: PPC: Book3S HV: Provide a method for userspace to read and write the HPT Alexander Graf
@ 2012-12-06 0:40 ` Alexander Graf
2012-12-06 0:40 ` [PATCH 09/28] KVM: PPC: Book3S HV: Report correct HPT entry index when reading HPT Alexander Graf
` (20 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:40 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
With HV-style KVM, we maintain reverse-mapping lists that enable us to
find all the HPT (hashed page table) entries that reference each guest
physical page, with the heads of the lists in the memslot->arch.rmap
arrays. When we reset the HPT (i.e. when we reboot the VM), we clear
out all the HPT entries but we were not clearing out the reverse
mapping lists. The result is that as we create new HPT entries, the
lists get corrupted, which can easily lead to loops, resulting in the
host kernel hanging when it tries to traverse those lists.
This fixes the problem by zeroing out all the reverse mapping lists
when we zero out the HPT. This incidentally means that we are also
zeroing our record of the referenced and changed bits (not the bits
in the Linux PTEs, used by the Linux MM subsystem, but the bits used
by the KVM_GET_DIRTY_LOG ioctl, and those used by kvm_age_hva() and
kvm_test_age_hva()).
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/book3s_64_mmu_hv.c | 24 ++++++++++++++++++++++++
1 files changed, 24 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 0aa4073..1029e22 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -46,6 +46,7 @@
static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret);
+static void kvmppc_rmap_reset(struct kvm *kvm);
long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
{
@@ -144,6 +145,10 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
/* Set the entire HPT to 0, i.e. invalid HPTEs */
memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
/*
+ * Reset all the reverse-mapping chains for all memslots
+ */
+ kvmppc_rmap_reset(kvm);
+ /*
* Set the whole last_vcpu array to an invalid vcpu number.
* This ensures that each vcpu will flush its TLB on next entry.
*/
@@ -772,6 +777,25 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
goto out_put;
}
+static void kvmppc_rmap_reset(struct kvm *kvm)
+{
+ struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
+ int srcu_idx;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+ slots = kvm->memslots;
+ kvm_for_each_memslot(memslot, slots) {
+ /*
+ * This assumes it is acceptable to lose reference and
+ * change bits across a reset.
+ */
+ memset(memslot->arch.rmap, 0,
+ memslot->npages * sizeof(*memslot->arch.rmap));
+ }
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
static int kvm_handle_hva_range(struct kvm *kvm,
unsigned long start,
unsigned long end,
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 09/28] KVM: PPC: Book3S HV: Report correct HPT entry index when reading HPT
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (7 preceding siblings ...)
2012-12-06 0:40 ` [PATCH 08/28] KVM: PPC: Book3S HV: Reset reverse-map chains when resetting " Alexander Graf
@ 2012-12-06 0:40 ` Alexander Graf
2012-12-06 0:40 ` [PATCH 10/28] KVM: PPC: Book3S HV: Don't give the guest RW access to RO pages Alexander Graf
` (19 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:40 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
This fixes a bug in the code which allows userspace to read out the
contents of the guest's hashed page table (HPT). On the second and
subsequent passes through the HPT, when we are reporting only those
entries that have changed, we were incorrectly initializing the index
field of the header with the index of the first entry we skipped
rather than the first changed entry. This fixes it.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 1029e22..ac6b5ac 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1282,7 +1282,6 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
while (nb + sizeof(hdr) + HPTE_SIZE < count) {
/* Initialize header */
hptr = (struct kvm_get_htab_header __user *)buf;
- hdr.index = i;
hdr.n_valid = 0;
hdr.n_invalid = 0;
nw = nb;
@@ -1298,6 +1297,7 @@ static ssize_t kvm_htab_read(struct file *file, char __user *buf,
++revp;
}
}
+ hdr.index = i;
/* Grab a series of valid entries */
while (i < kvm->arch.hpt_npte &&
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 10/28] KVM: PPC: Book3S HV: Don't give the guest RW access to RO pages
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (8 preceding siblings ...)
2012-12-06 0:40 ` [PATCH 09/28] KVM: PPC: Book3S HV: Report correct HPT entry index when reading HPT Alexander Graf
@ 2012-12-06 0:40 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 11/28] KVM: PPC: Book3S PR: Emulate PURR, SPURR and DSCR registers Alexander Graf
` (18 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:40 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
Currently, if the guest does an H_PROTECT hcall requesting that the
permissions on a HPT entry be changed to allow writing, we make the
requested change even if the page is marked read-only in the host
Linux page tables. This is a problem since it would for instance
allow a guest to modify a page that KSM has decided can be shared
between multiple guests.
To fix this, if the new permissions for the page allow writing, we need
to look up the memslot for the page, work out the host virtual address,
and look up the Linux page tables to get the PTE for the page. If that
PTE is read-only, we reduce the HPTE permissions to read-only.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 22 ++++++++++++++++++++++
1 files changed, 22 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index fc3da32..7a57ea4 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -600,6 +600,28 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
asm volatile("tlbiel %0" : : "r" (rb));
asm volatile("ptesync" : : : "memory");
}
+ /*
+ * If the host has this page as readonly but the guest
+ * wants to make it read/write, reduce the permissions.
+ * Checking the host permissions involves finding the
+ * memslot and then the Linux PTE for the page.
+ */
+ if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
+ unsigned long psize, gfn, hva;
+ struct kvm_memory_slot *memslot;
+ pgd_t *pgdir = vcpu->arch.pgdir;
+ pte_t pte;
+
+ psize = hpte_page_size(v, r);
+ gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
+ memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn);
+ if (memslot) {
+ hva = __gfn_to_hva_memslot(memslot, gfn);
+ pte = lookup_linux_pte(pgdir, hva, 1, &psize);
+ if (pte_present(pte) && !pte_write(pte))
+ r = hpte_make_readonly(r);
+ }
+ }
}
hpte[1] = r;
eieio();
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 11/28] KVM: PPC: Book3S PR: Emulate PURR, SPURR and DSCR registers
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (9 preceding siblings ...)
2012-12-06 0:40 ` [PATCH 10/28] KVM: PPC: Book3S HV: Don't give the guest RW access to RO pages Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 12/28] KVM: PPC: Book3S PR: Fix VSX handling Alexander Graf
` (17 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
This adds basic emulation of the PURR and SPURR registers. We assume
we are emulating a single-threaded core, so these advance at the same
rate as the timebase. A Linux kernel running on a POWER7 expects to
be able to access these registers and is not prepared to handle a
program interrupt on accessing them.
This also adds a very minimal emulation of the DSCR (data stream
control register). Writes are ignored and reads return zero.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/kvm_book3s.h | 2 ++
arch/powerpc/kvm/book3s_emulate.c | 16 +++++++++++++++-
2 files changed, 17 insertions(+), 1 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 46763d1..5a56e1c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -81,6 +81,8 @@ struct kvmppc_vcpu_book3s {
u64 sdr1;
u64 hior;
u64 msr_mask;
+ u64 purr_offset;
+ u64 spurr_offset;
#ifdef CONFIG_PPC_BOOK3S_32
u32 vsid_pool[VSID_POOL_SIZE];
u32 vsid_next;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index b9a989d..d31a716 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -22,6 +22,7 @@
#include <asm/kvm_book3s.h>
#include <asm/reg.h>
#include <asm/switch_to.h>
+#include <asm/time.h>
#define OP_19_XOP_RFID 18
#define OP_19_XOP_RFI 50
@@ -395,6 +396,12 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
(mfmsr() & MSR_HV))
vcpu->arch.hflags |= BOOK3S_HFLAG_DCBZ32;
break;
+ case SPRN_PURR:
+ to_book3s(vcpu)->purr_offset = spr_val - get_tb();
+ break;
+ case SPRN_SPURR:
+ to_book3s(vcpu)->spurr_offset = spr_val - get_tb();
+ break;
case SPRN_GQR0:
case SPRN_GQR1:
case SPRN_GQR2:
@@ -412,6 +419,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_CTRLF:
case SPRN_CTRLT:
case SPRN_L2CR:
+ case SPRN_DSCR:
case SPRN_MMCR0_GEKKO:
case SPRN_MMCR1_GEKKO:
case SPRN_PMC1_GEKKO:
@@ -483,9 +491,15 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
*spr_val = to_book3s(vcpu)->hid[5];
break;
case SPRN_CFAR:
- case SPRN_PURR:
+ case SPRN_DSCR:
*spr_val = 0;
break;
+ case SPRN_PURR:
+ *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
+ break;
+ case SPRN_SPURR:
+ *spr_val = get_tb() + to_book3s(vcpu)->purr_offset;
+ break;
case SPRN_GQR0:
case SPRN_GQR1:
case SPRN_GQR2:
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 12/28] KVM: PPC: Book3S PR: Fix VSX handling
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (10 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 11/28] KVM: PPC: Book3S PR: Emulate PURR, SPURR and DSCR registers Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 13/28] KVM: PPC: Book3S PR: MSR_DE doesn't exist on Book 3S Alexander Graf
` (16 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
This fixes various issues in how we were handling the VSX registers
that exist on POWER7 machines. First, we were running off the end
of the current->thread.fpr[] array. Ultimately this was because the
vcpu->arch.vsr[] array is sized to be able to store both the FP
registers and the extra VSX registers (i.e. 64 entries), but PR KVM
only uses it for the extra VSX registers (i.e. 32 entries).
Secondly, calling load_up_vsx() from C code is a really bad idea,
because it jumps to fast_exception_return at the end, rather than
returning with a blr instruction. This was causing it to jump off
to a random location with random register contents, since it was using
the largely uninitialized stack frame created by kvmppc_load_up_vsx.
In fact, it isn't necessary to call either __giveup_vsx or load_up_vsx,
since giveup_fpu and load_up_fpu handle the extra VSX registers as well
as the standard FP registers on machines with VSX. Also, since VSX
instructions can access the VMX registers and the FP registers as well
as the extra VSX registers, we have to load up the FP and VMX registers
before we can turn on the MSR_VSX bit for the guest. Conversely, if
we save away any of the VSX or FP registers, we have to turn off MSR_VSX
for the guest.
To handle all this, it is more convenient for a single call to
kvmppc_giveup_ext() to handle all the state saving that needs to be done,
so we make it take a set of MSR bits rather than just one, and the switch
statement becomes a series of if statements. Similarly kvmppc_handle_ext
needs to be able to load up more than one set of registers.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/reg.h | 1 +
arch/powerpc/kvm/book3s_exports.c | 3 -
arch/powerpc/kvm/book3s_pr.c | 112 ++++++++++++++++++---------------
arch/powerpc/kvm/book3s_rmhandlers.S | 3 -
4 files changed, 62 insertions(+), 57 deletions(-)
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index d24c141..97d3727 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -518,6 +518,7 @@
#define SRR1_WS_DEEPER 0x00020000 /* Some resources not maintained */
#define SRR1_WS_DEEP 0x00010000 /* All resources maintained */
#define SRR1_PROGFPE 0x00100000 /* Floating Point Enabled */
+#define SRR1_PROGILL 0x00080000 /* Illegal instruction */
#define SRR1_PROGPRIV 0x00040000 /* Privileged instruction */
#define SRR1_PROGTRAP 0x00020000 /* Trap */
#define SRR1_PROGADDR 0x00010000 /* SRR0 contains subsequent addr */
diff --git a/arch/powerpc/kvm/book3s_exports.c b/arch/powerpc/kvm/book3s_exports.c
index a150817..7057a02 100644
--- a/arch/powerpc/kvm/book3s_exports.c
+++ b/arch/powerpc/kvm/book3s_exports.c
@@ -28,8 +28,5 @@ EXPORT_SYMBOL_GPL(kvmppc_load_up_fpu);
#ifdef CONFIG_ALTIVEC
EXPORT_SYMBOL_GPL(kvmppc_load_up_altivec);
#endif
-#ifdef CONFIG_VSX
-EXPORT_SYMBOL_GPL(kvmppc_load_up_vsx);
-#endif
#endif
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index b853696..5c496ec 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -81,9 +81,7 @@ void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
svcpu_put(svcpu);
#endif
- kvmppc_giveup_ext(vcpu, MSR_FP);
- kvmppc_giveup_ext(vcpu, MSR_VEC);
- kvmppc_giveup_ext(vcpu, MSR_VSX);
+ kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
vcpu->cpu = -1;
}
@@ -433,10 +431,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
static inline int get_fpr_index(int i)
{
-#ifdef CONFIG_VSX
- i *= 2;
-#endif
- return i;
+ return i * TS_FPRWIDTH;
}
/* Give up external provider (FPU, Altivec, VSX) */
@@ -450,41 +445,49 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
u64 *thread_fpr = (u64*)t->fpr;
int i;
- if (!(vcpu->arch.guest_owned_ext & msr))
+ /*
+ * VSX instructions can access FP and vector registers, so if
+ * we are giving up VSX, make sure we give up FP and VMX as well.
+ */
+ if (msr & MSR_VSX)
+ msr |= MSR_FP | MSR_VEC;
+
+ msr &= vcpu->arch.guest_owned_ext;
+ if (!msr)
return;
#ifdef DEBUG_EXT
printk(KERN_INFO "Giving up ext 0x%lx\n", msr);
#endif
- switch (msr) {
- case MSR_FP:
+ if (msr & MSR_FP) {
+ /*
+ * Note that on CPUs with VSX, giveup_fpu stores
+ * both the traditional FP registers and the added VSX
+ * registers into thread.fpr[].
+ */
giveup_fpu(current);
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
vcpu_fpr[i] = thread_fpr[get_fpr_index(i)];
vcpu->arch.fpscr = t->fpscr.val;
- break;
- case MSR_VEC:
+
+#ifdef CONFIG_VSX
+ if (cpu_has_feature(CPU_FTR_VSX))
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
+ vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
+#endif
+ }
+
#ifdef CONFIG_ALTIVEC
+ if (msr & MSR_VEC) {
giveup_altivec(current);
memcpy(vcpu->arch.vr, t->vr, sizeof(vcpu->arch.vr));
vcpu->arch.vscr = t->vscr;
-#endif
- break;
- case MSR_VSX:
-#ifdef CONFIG_VSX
- __giveup_vsx(current);
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
- vcpu_vsx[i] = thread_fpr[get_fpr_index(i) + 1];
-#endif
- break;
- default:
- BUG();
}
+#endif
- vcpu->arch.guest_owned_ext &= ~msr;
- current->thread.regs->msr &= ~msr;
+ vcpu->arch.guest_owned_ext &= ~(msr | MSR_VSX);
kvmppc_recalc_shadow_msr(vcpu);
}
@@ -544,47 +547,56 @@ static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
return RESUME_GUEST;
}
- /* We already own the ext */
- if (vcpu->arch.guest_owned_ext & msr) {
- return RESUME_GUEST;
+ if (msr == MSR_VSX) {
+ /* No VSX? Give an illegal instruction interrupt */
+#ifdef CONFIG_VSX
+ if (!cpu_has_feature(CPU_FTR_VSX))
+#endif
+ {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ return RESUME_GUEST;
+ }
+
+ /*
+ * We have to load up all the FP and VMX registers before
+ * we can let the guest use VSX instructions.
+ */
+ msr = MSR_FP | MSR_VEC | MSR_VSX;
}
+ /* See if we already own all the ext(s) needed */
+ msr &= ~vcpu->arch.guest_owned_ext;
+ if (!msr)
+ return RESUME_GUEST;
+
#ifdef DEBUG_EXT
printk(KERN_INFO "Loading up ext 0x%lx\n", msr);
#endif
current->thread.regs->msr |= msr;
- switch (msr) {
- case MSR_FP:
+ if (msr & MSR_FP) {
for (i = 0; i < ARRAY_SIZE(vcpu->arch.fpr); i++)
thread_fpr[get_fpr_index(i)] = vcpu_fpr[i];
-
+#ifdef CONFIG_VSX
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr) / 2; i++)
+ thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
+#endif
t->fpscr.val = vcpu->arch.fpscr;
t->fpexc_mode = 0;
kvmppc_load_up_fpu();
- break;
- case MSR_VEC:
+ }
+
+ if (msr & MSR_VEC) {
#ifdef CONFIG_ALTIVEC
memcpy(t->vr, vcpu->arch.vr, sizeof(vcpu->arch.vr));
t->vscr = vcpu->arch.vscr;
t->vrsave = -1;
kvmppc_load_up_altivec();
#endif
- break;
- case MSR_VSX:
-#ifdef CONFIG_VSX
- for (i = 0; i < ARRAY_SIZE(vcpu->arch.vsr); i++)
- thread_fpr[get_fpr_index(i) + 1] = vcpu_vsx[i];
- kvmppc_load_up_vsx();
-#endif
- break;
- default:
- BUG();
}
vcpu->arch.guest_owned_ext |= msr;
-
kvmppc_recalc_shadow_msr(vcpu);
return RESUME_GUEST;
@@ -1134,7 +1146,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/* Save VSX state in stack */
used_vsr = current->thread.used_vsr;
if (used_vsr && (current->thread.regs->msr & MSR_VSX))
- __giveup_vsx(current);
+ __giveup_vsx(current);
#endif
/* Remember the MSR with disabled extensions */
@@ -1151,14 +1163,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
/* No need for kvm_guest_exit. It's done in handle_exit.
We also get here with interrupts enabled. */
- current->thread.regs->msr = ext_msr;
-
/* Make sure we save the guest FPU/Altivec/VSX state */
- kvmppc_giveup_ext(vcpu, MSR_FP);
- kvmppc_giveup_ext(vcpu, MSR_VEC);
- kvmppc_giveup_ext(vcpu, MSR_VSX);
+ kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
+
+ current->thread.regs->msr = ext_msr;
- /* Restore FPU state from stack */
+ /* Restore FPU/VSX state from stack */
memcpy(current->thread.fpr, fpr, sizeof(current->thread.fpr));
current->thread.fpscr.val = fpscr;
current->thread.fpexc_mode = fpexc_mode;
diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S
index b2f8258..8f7633e 100644
--- a/arch/powerpc/kvm/book3s_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_rmhandlers.S
@@ -234,8 +234,5 @@ define_load_up(fpu)
#ifdef CONFIG_ALTIVEC
define_load_up(altivec)
#endif
-#ifdef CONFIG_VSX
-define_load_up(vsx)
-#endif
#include "book3s_segment.S"
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 13/28] KVM: PPC: Book3S PR: MSR_DE doesn't exist on Book 3S
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (11 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 12/28] KVM: PPC: Book3S PR: Fix VSX handling Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 14/28] MAINTAINERS: Add git tree link for PPC KVM Alexander Graf
` (15 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
The mask of MSR bits that get transferred from the guest MSR to the
shadow MSR included MSR_DE. In fact that bit only exists on Book 3E
processors, and it is assigned the same bit used for MSR_BE on Book 3S
processors. Since we already had MSR_BE in the mask, this just removes
MSR_DE.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/book3s_pr.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 5c496ec..28d38ad 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -145,7 +145,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
ulong smsr = vcpu->arch.shared->msr;
/* Guest MSR values */
- smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_DE;
+ smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE;
/* Process MSR values */
smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
/* External providers the guest reserved */
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 14/28] MAINTAINERS: Add git tree link for PPC KVM
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (12 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 13/28] KVM: PPC: Book3S PR: MSR_DE doesn't exist on Book 3S Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 15/28] KVM: PPC: Book3S HV: Improve handling of local vs. global TLB invalidations Alexander Graf
` (14 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Michael Ellerman
From: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
MAINTAINERS | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/MAINTAINERS b/MAINTAINERS
index 4376c52..7034467 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4253,6 +4253,7 @@ KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
M: Alexander Graf <agraf@suse.de>
L: kvm-ppc@vger.kernel.org
W: http://kvm.qumranet.com
+T: git git://github.com/agraf/linux-2.6.git
S: Supported
F: arch/powerpc/include/asm/kvm*
F: arch/powerpc/kvm/
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 15/28] KVM: PPC: Book3S HV: Improve handling of local vs. global TLB invalidations
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (13 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 14/28] MAINTAINERS: Add git tree link for PPC KVM Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 16/28] KVM: PPC: Book3S HV: Handle guest-caused machine checks on POWER7 without panicking Alexander Graf
` (13 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
When we change or remove a HPT (hashed page table) entry, we can do
either a global TLB invalidation (tlbie) that works across the whole
machine, or a local invalidation (tlbiel) that only affects this core.
Currently we do local invalidations if the VM has only one vcpu or if
the guest requests it with the H_LOCAL flag, though the guest Linux
kernel currently doesn't ever use H_LOCAL. Then, to cope with the
possibility that vcpus moving around to different physical cores might
expose stale TLB entries, there is some code in kvmppc_hv_entry to
flush the whole TLB of entries for this VM if either this vcpu is now
running on a different physical core from where it last ran, or if this
physical core last ran a different vcpu.
There are a number of problems on POWER7 with this as it stands:
- The TLB invalidation is done per thread, whereas it only needs to be
done per core, since the TLB is shared between the threads.
- With the possibility of the host paging out guest pages, the use of
H_LOCAL by an SMP guest is dangerous since the guest could possibly
retain and use a stale TLB entry pointing to a page that had been
removed from the guest.
- The TLB invalidations that we do when a vcpu moves from one physical
core to another are unnecessary in the case of an SMP guest that isn't
using H_LOCAL.
- The optimization of using local invalidations rather than global should
apply to guests with one virtual core, not just one vcpu.
(None of this applies on PPC970, since there we always have to
invalidate the whole TLB when entering and leaving the guest, and we
can't support paging out guest memory.)
To fix these problems and simplify the code, we now maintain a simple
cpumask of which cpus need to flush the TLB on entry to the guest.
(This is indexed by cpu, though we only ever use the bits for thread
0 of each core.) Whenever we do a local TLB invalidation, we set the
bits for every cpu except the bit for thread 0 of the core that we're
currently running on. Whenever we enter a guest, we test and clear the
bit for our core, and flush the TLB if it was set.
On initial startup of the VM, and when resetting the HPT, we set all the
bits in the need_tlb_flush cpumask, since any core could potentially have
stale TLB entries from the previous VM to use the same LPID, or the
previous contents of the HPT.
Then, we maintain a count of the number of online virtual cores, and use
that when deciding whether to use a local invalidation rather than the
number of online vcpus. The code to make that decision is extracted out
into a new function, global_invalidates(). For multi-core guests on
POWER7 (i.e. when we are using mmu notifiers), we now never do local
invalidations regardless of the H_LOCAL flag.
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/kvm_host.h | 5 ++-
arch/powerpc/kernel/asm-offsets.c | 4 +--
arch/powerpc/kvm/book3s_64_mmu_hv.c | 7 +---
arch/powerpc/kvm/book3s_hv.c | 9 ++++-
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 37 ++++++++++++++++++--
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 56 ++++++++++++++----------------
6 files changed, 73 insertions(+), 45 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 58c7264..62fbd38 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -246,11 +246,12 @@ struct kvm_arch {
int using_mmu_notifiers;
u32 hpt_order;
atomic_t vcpus_running;
+ u32 online_vcores;
unsigned long hpt_npte;
unsigned long hpt_mask;
atomic_t hpte_mod_interest;
spinlock_t slot_phys_lock;
- unsigned short last_vcpu[NR_CPUS];
+ cpumask_t need_tlb_flush;
struct kvmppc_vcore *vcores[KVM_MAX_VCORES];
struct kvmppc_linear_info *hpt_li;
#endif /* CONFIG_KVM_BOOK3S_64_HV */
@@ -275,6 +276,7 @@ struct kvmppc_vcore {
int nap_count;
int napping_threads;
u16 pcpu;
+ u16 last_cpu;
u8 vcore_state;
u8 in_guest;
struct list_head runnable_threads;
@@ -523,7 +525,6 @@ struct kvm_vcpu_arch {
u64 dec_jiffies;
u64 dec_expires;
unsigned long pending_exceptions;
- u16 last_cpu;
u8 ceded;
u8 prodded;
u32 last_inst;
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 7523539..4e23ba2 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -441,8 +441,7 @@ int main(void)
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
- DEFINE(KVM_ONLINE_CPUS, offsetof(struct kvm, online_vcpus.counter));
- DEFINE(KVM_LAST_VCPU, offsetof(struct kvm, arch.last_vcpu));
+ DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
DEFINE(KVM_RMOR, offsetof(struct kvm, arch.rmor));
DEFINE(KVM_VRMA_SLB_V, offsetof(struct kvm, arch.vrma_slb_v));
@@ -470,7 +469,6 @@ int main(void)
DEFINE(VCPU_SLB, offsetof(struct kvm_vcpu, arch.slb));
DEFINE(VCPU_SLB_MAX, offsetof(struct kvm_vcpu, arch.slb_max));
DEFINE(VCPU_SLB_NR, offsetof(struct kvm_vcpu, arch.slb_nr));
- DEFINE(VCPU_LAST_CPU, offsetof(struct kvm_vcpu, arch.last_cpu));
DEFINE(VCPU_FAULT_DSISR, offsetof(struct kvm_vcpu, arch.fault_dsisr));
DEFINE(VCPU_FAULT_DAR, offsetof(struct kvm_vcpu, arch.fault_dar));
DEFINE(VCPU_LAST_INST, offsetof(struct kvm_vcpu, arch.last_inst));
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ac6b5ac..8cc18ab 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -148,11 +148,8 @@ long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
* Reset all the reverse-mapping chains for all memslots
*/
kvmppc_rmap_reset(kvm);
- /*
- * Set the whole last_vcpu array to an invalid vcpu number.
- * This ensures that each vcpu will flush its TLB on next entry.
- */
- memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu));
+ /* Ensure that each vcpu will flush its TLB on next entry. */
+ cpumask_setall(&kvm->arch.need_tlb_flush);
*htab_orderp = order;
err = 0;
} else {
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index a4f59db..ddbec60 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -853,7 +853,6 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
goto free_vcpu;
vcpu->arch.shared = &vcpu->arch.shregs;
- vcpu->arch.last_cpu = -1;
vcpu->arch.mmcr[0] = MMCR0_FC;
vcpu->arch.ctrl = CTRL_RUNLATCH;
/* default to host PVR, since we can't spoof it */
@@ -880,6 +879,7 @@ struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
vcore->preempt_tb = TB_NIL;
}
kvm->arch.vcores[core] = vcore;
+ kvm->arch.online_vcores++;
}
mutex_unlock(&kvm->lock);
@@ -1802,6 +1802,13 @@ int kvmppc_core_init_vm(struct kvm *kvm)
return -ENOMEM;
kvm->arch.lpid = lpid;
+ /*
+ * Since we don't flush the TLB when tearing down a VM,
+ * and this lpid might have previously been used,
+ * make sure we flush on each core before running the new VM.
+ */
+ cpumask_setall(&kvm->arch.need_tlb_flush);
+
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
kvm->arch.rma = NULL;
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7a57ea4..19c93ba 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -35,6 +35,37 @@ static void *real_vmalloc_addr(void *x)
return __va(addr);
}
+/* Return 1 if we need to do a global tlbie, 0 if we can use tlbiel */
+static int global_invalidates(struct kvm *kvm, unsigned long flags)
+{
+ int global;
+
+ /*
+ * If there is only one vcore, and it's currently running,
+ * we can use tlbiel as long as we mark all other physical
+ * cores as potentially having stale TLB entries for this lpid.
+ * If we're not using MMU notifiers, we never take pages away
+ * from the guest, so we can use tlbiel if requested.
+ * Otherwise, don't use tlbiel.
+ */
+ if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcore)
+ global = 0;
+ else if (kvm->arch.using_mmu_notifiers)
+ global = 1;
+ else
+ global = !(flags & H_LOCAL);
+
+ if (!global) {
+ /* any other core might now have stale TLB entries... */
+ smp_wmb();
+ cpumask_setall(&kvm->arch.need_tlb_flush);
+ cpumask_clear_cpu(local_paca->kvm_hstate.kvm_vcore->pcpu,
+ &kvm->arch.need_tlb_flush);
+ }
+
+ return global;
+}
+
/*
* Add this HPTE into the chain for the real page.
* Must be called with the chain locked; it unlocks the chain.
@@ -390,7 +421,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
if (v & HPTE_V_VALID) {
hpte[0] &= ~HPTE_V_VALID;
rb = compute_tlbie_rb(v, hpte[1], pte_index);
- if (!(flags & H_LOCAL) && atomic_read(&kvm->online_vcpus) > 1) {
+ if (global_invalidates(kvm, flags)) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
@@ -565,8 +596,6 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
return H_NOT_FOUND;
}
- if (atomic_read(&kvm->online_vcpus) == 1)
- flags |= H_LOCAL;
v = hpte[0];
bits = (flags << 55) & HPTE_R_PP0;
bits |= (flags << 48) & HPTE_R_KEY_HI;
@@ -587,7 +616,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
if (v & HPTE_V_VALID) {
rb = compute_tlbie_rb(v, r, pte_index);
hpte[0] = v & ~HPTE_V_VALID;
- if (!(flags & H_LOCAL)) {
+ if (global_invalidates(kvm, flags)) {
while(!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
asm volatile("ptesync" : : : "memory");
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 690d112..b48bd53 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -313,7 +313,33 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
mtspr SPRN_SDR1,r6 /* switch to partition page table */
mtspr SPRN_LPID,r7
isync
+
+ /* See if we need to flush the TLB */
+ lhz r6,PACAPACAINDEX(r13) /* test_bit(cpu, need_tlb_flush) */
+ clrldi r7,r6,64-6 /* extract bit number (6 bits) */
+ srdi r6,r6,6 /* doubleword number */
+ sldi r6,r6,3 /* address offset */
+ add r6,r6,r9
+ addi r6,r6,KVM_NEED_FLUSH /* dword in kvm->arch.need_tlb_flush */
li r0,1
+ sld r0,r0,r7
+ ld r7,0(r6)
+ and. r7,r7,r0
+ beq 22f
+23: ldarx r7,0,r6 /* if set, clear the bit */
+ andc r7,r7,r0
+ stdcx. r7,0,r6
+ bne 23b
+ li r6,128 /* and flush the TLB */
+ mtctr r6
+ li r7,0x800 /* IS field = 0b10 */
+ ptesync
+28: tlbiel r7
+ addi r7,r7,0x1000
+ bdnz 28b
+ ptesync
+
+22: li r0,1
stb r0,VCORE_IN_GUEST(r5) /* signal secondaries to continue */
b 10f
@@ -336,36 +362,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
mr r9,r4
blt hdec_soon
- /*
- * Invalidate the TLB if we could possibly have stale TLB
- * entries for this partition on this core due to the use
- * of tlbiel.
- * XXX maybe only need this on primary thread?
- */
- ld r9,VCPU_KVM(r4) /* pointer to struct kvm */
- lwz r5,VCPU_VCPUID(r4)
- lhz r6,PACAPACAINDEX(r13)
- rldimi r6,r5,0,62 /* XXX map as if threads 1:1 p:v */
- lhz r8,VCPU_LAST_CPU(r4)
- sldi r7,r6,1 /* see if this is the same vcpu */
- add r7,r7,r9 /* as last ran on this pcpu */
- lhz r0,KVM_LAST_VCPU(r7)
- cmpw r6,r8 /* on the same cpu core as last time? */
- bne 3f
- cmpw r0,r5 /* same vcpu as this core last ran? */
- beq 1f
-3: sth r6,VCPU_LAST_CPU(r4) /* if not, invalidate partition TLB */
- sth r5,KVM_LAST_VCPU(r7)
- li r6,128
- mtctr r6
- li r7,0x800 /* IS field = 0b10 */
- ptesync
-2: tlbiel r7
- addi r7,r7,0x1000
- bdnz 2b
- ptesync
-1:
-
/* Save purr/spurr */
mfspr r5,SPRN_PURR
mfspr r6,SPRN_SPURR
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 16/28] KVM: PPC: Book3S HV: Handle guest-caused machine checks on POWER7 without panicking
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (14 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 15/28] KVM: PPC: Book3S HV: Improve handling of local vs. global TLB invalidations Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 17/28] KVM: PPC: e500: Silence bogus GCC warning in tlb code Alexander Graf
` (12 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Paul Mackerras
From: Paul Mackerras <paulus@samba.org>
Currently, if a machine check interrupt happens while we are in the
guest, we exit the guest and call the host's machine check handler,
which tends to cause the host to panic. Some machine checks can be
triggered by the guest; for example, if the guest creates two entries
in the SLB that map the same effective address, and then accesses that
effective address, the CPU will take a machine check interrupt.
To handle this better, when a machine check happens inside the guest,
we call a new function, kvmppc_realmode_machine_check(), while still in
real mode before exiting the guest. On POWER7, it handles the cases
that the guest can trigger, either by flushing and reloading the SLB,
or by flushing the TLB, and then it delivers the machine check interrupt
directly to the guest without going back to the host. On POWER7, the
OPAL firmware patches the machine check interrupt vector so that it
gets control first, and it leaves behind its analysis of the situation
in a structure pointed to by the opal_mc_evt field of the paca. The
kvmppc_realmode_machine_check() function looks at this, and if OPAL
reports that there was no error, or that it has handled the error, we
also go straight back to the guest with a machine check. We have to
deliver a machine check to the guest since the machine check interrupt
might have trashed valid values in SRR0/1.
If the machine check is one we can't handle in real mode, and one that
OPAL hasn't already handled, or on PPC970, we exit the guest and call
the host's machine check handler. We do this by jumping to the
machine_check_fwnmi label, rather than absolute address 0x200, because
we don't want to re-execute OPAL's handler on POWER7. On PPC970, the
two are equivalent because address 0x200 just contains a branch.
Then, if the host machine check handler decides that the system can
continue executing, kvmppc_handle_exit() delivers a machine check
interrupt to the guest -- once again to let the guest know that SRR0/1
have been modified.
Signed-off-by: Paul Mackerras <paulus@samba.org>
[agraf: fix checkpatch warnings]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/mmu-hash64.h | 10 ++
arch/powerpc/kvm/Makefile | 1 +
arch/powerpc/kvm/book3s_hv.c | 11 +++
arch/powerpc/kvm/book3s_hv_ras.c | 144 +++++++++++++++++++++++++++++++
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 75 ++++++++++------
5 files changed, 213 insertions(+), 28 deletions(-)
create mode 100644 arch/powerpc/kvm/book3s_hv_ras.c
diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h
index 9673f73..2fdb47a 100644
--- a/arch/powerpc/include/asm/mmu-hash64.h
+++ b/arch/powerpc/include/asm/mmu-hash64.h
@@ -121,6 +121,16 @@ extern char initial_stab[];
#define PP_RXRX 3 /* Supervisor read, User read */
#define PP_RXXX (HPTE_R_PP0 | 2) /* Supervisor read, user none */
+/* Fields for tlbiel instruction in architecture 2.06 */
+#define TLBIEL_INVAL_SEL_MASK 0xc00 /* invalidation selector */
+#define TLBIEL_INVAL_PAGE 0x000 /* invalidate a single page */
+#define TLBIEL_INVAL_SET_LPID 0x800 /* invalidate a set for current LPID */
+#define TLBIEL_INVAL_SET 0xc00 /* invalidate a set for all LPIDs */
+#define TLBIEL_INVAL_SET_MASK 0xfff000 /* set number to inval. */
+#define TLBIEL_INVAL_SET_SHIFT 12
+
+#define POWER7_TLB_SETS 128 /* # sets in POWER7 TLB */
+
#ifndef __ASSEMBLY__
struct hash_pte {
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index cd89658..1e473d4 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -73,6 +73,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
book3s_hv_rmhandlers.o \
book3s_hv_rm_mmu.o \
book3s_64_vio_hv.o \
+ book3s_hv_ras.o \
book3s_hv_builtin.o
kvm-book3s_64-module-objs := \
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ddbec60..71d0c90 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -545,6 +545,17 @@ static int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
case BOOK3S_INTERRUPT_PERFMON:
r = RESUME_GUEST;
break;
+ case BOOK3S_INTERRUPT_MACHINE_CHECK:
+ /*
+ * Deliver a machine check interrupt to the guest.
+ * We have to do this, even if the host has handled the
+ * machine check, because machine checks use SRR0/1 and
+ * the interrupt might have trashed guest state in them.
+ */
+ kvmppc_book3s_queue_irqprio(vcpu,
+ BOOK3S_INTERRUPT_MACHINE_CHECK);
+ r = RESUME_GUEST;
+ break;
case BOOK3S_INTERRUPT_PROGRAM:
{
ulong flags;
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
new file mode 100644
index 0000000..35f3cf0
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -0,0 +1,144 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright 2012 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ */
+
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/kernel.h>
+#include <asm/opal.h>
+
+/* SRR1 bits for machine check on POWER7 */
+#define SRR1_MC_LDSTERR (1ul << (63-42))
+#define SRR1_MC_IFETCH_SH (63-45)
+#define SRR1_MC_IFETCH_MASK 0x7
+#define SRR1_MC_IFETCH_SLBPAR 2 /* SLB parity error */
+#define SRR1_MC_IFETCH_SLBMULTI 3 /* SLB multi-hit */
+#define SRR1_MC_IFETCH_SLBPARMULTI 4 /* SLB parity + multi-hit */
+#define SRR1_MC_IFETCH_TLBMULTI 5 /* I-TLB multi-hit */
+
+/* DSISR bits for machine check on POWER7 */
+#define DSISR_MC_DERAT_MULTI 0x800 /* D-ERAT multi-hit */
+#define DSISR_MC_TLB_MULTI 0x400 /* D-TLB multi-hit */
+#define DSISR_MC_SLB_PARITY 0x100 /* SLB parity error */
+#define DSISR_MC_SLB_MULTI 0x080 /* SLB multi-hit */
+#define DSISR_MC_SLB_PARMULTI 0x040 /* SLB parity + multi-hit */
+
+/* POWER7 SLB flush and reload */
+static void reload_slb(struct kvm_vcpu *vcpu)
+{
+ struct slb_shadow *slb;
+ unsigned long i, n;
+
+ /* First clear out SLB */
+ asm volatile("slbmte %0,%0; slbia" : : "r" (0));
+
+ /* Do they have an SLB shadow buffer registered? */
+ slb = vcpu->arch.slb_shadow.pinned_addr;
+ if (!slb)
+ return;
+
+ /* Sanity check */
+ n = min_t(u32, slb->persistent, SLB_MIN_SIZE);
+ if ((void *) &slb->save_area[n] > vcpu->arch.slb_shadow.pinned_end)
+ return;
+
+ /* Load up the SLB from that */
+ for (i = 0; i < n; ++i) {
+ unsigned long rb = slb->save_area[i].esid;
+ unsigned long rs = slb->save_area[i].vsid;
+
+ rb = (rb & ~0xFFFul) | i; /* insert entry number */
+ asm volatile("slbmte %0,%1" : : "r" (rs), "r" (rb));
+ }
+}
+
+/* POWER7 TLB flush */
+static void flush_tlb_power7(struct kvm_vcpu *vcpu)
+{
+ unsigned long i, rb;
+
+ rb = TLBIEL_INVAL_SET_LPID;
+ for (i = 0; i < POWER7_TLB_SETS; ++i) {
+ asm volatile("tlbiel %0" : : "r" (rb));
+ rb += 1 << TLBIEL_INVAL_SET_SHIFT;
+ }
+}
+
+/*
+ * On POWER7, see if we can handle a machine check that occurred inside
+ * the guest in real mode, without switching to the host partition.
+ *
+ * Returns: 0 => exit guest, 1 => deliver machine check to guest
+ */
+static long kvmppc_realmode_mc_power7(struct kvm_vcpu *vcpu)
+{
+ unsigned long srr1 = vcpu->arch.shregs.msr;
+ struct opal_machine_check_event *opal_evt;
+ long handled = 1;
+
+ if (srr1 & SRR1_MC_LDSTERR) {
+ /* error on load/store */
+ unsigned long dsisr = vcpu->arch.shregs.dsisr;
+
+ if (dsisr & (DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
+ DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI)) {
+ /* flush and reload SLB; flushes D-ERAT too */
+ reload_slb(vcpu);
+ dsisr &= ~(DSISR_MC_SLB_PARMULTI | DSISR_MC_SLB_MULTI |
+ DSISR_MC_SLB_PARITY | DSISR_MC_DERAT_MULTI);
+ }
+ if (dsisr & DSISR_MC_TLB_MULTI) {
+ flush_tlb_power7(vcpu);
+ dsisr &= ~DSISR_MC_TLB_MULTI;
+ }
+ /* Any other errors we don't understand? */
+ if (dsisr & 0xffffffffUL)
+ handled = 0;
+ }
+
+ switch ((srr1 >> SRR1_MC_IFETCH_SH) & SRR1_MC_IFETCH_MASK) {
+ case 0:
+ break;
+ case SRR1_MC_IFETCH_SLBPAR:
+ case SRR1_MC_IFETCH_SLBMULTI:
+ case SRR1_MC_IFETCH_SLBPARMULTI:
+ reload_slb(vcpu);
+ break;
+ case SRR1_MC_IFETCH_TLBMULTI:
+ flush_tlb_power7(vcpu);
+ break;
+ default:
+ handled = 0;
+ }
+
+ /*
+ * See if OPAL has already handled the condition.
+ * We assume that if the condition is recovered then OPAL
+ * will have generated an error log event that we will pick
+ * up and log later.
+ */
+ opal_evt = local_paca->opal_mc_evt;
+ if (opal_evt->version == OpalMCE_V1 &&
+ (opal_evt->severity == OpalMCE_SEV_NO_ERROR ||
+ opal_evt->disposition == OpalMCE_DISPOSITION_RECOVERED))
+ handled = 1;
+
+ if (handled)
+ opal_evt->in_use = 0;
+
+ return handled;
+}
+
+long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_206))
+ return kvmppc_realmode_mc_power7(vcpu);
+
+ return 0;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index b48bd53..10b6c35 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -27,6 +27,7 @@
#include <asm/asm-offsets.h>
#include <asm/exception-64s.h>
#include <asm/kvm_book3s_asm.h>
+#include <asm/mmu-hash64.h>
/*****************************************************************************
* *
@@ -678,8 +679,7 @@ BEGIN_FTR_SECTION
1:
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-nohpte_cont:
-hcall_real_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
+guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
/* Save DEC */
mfspr r5,SPRN_DEC
mftb r6
@@ -700,6 +700,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
std r6, VCPU_FAULT_DAR(r9)
stw r7, VCPU_FAULT_DSISR(r9)
+ /* See if it is a machine check */
+ cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq machine_check_realmode
+mc_cont:
+
/* Save guest CTRL register, set runlatch to 1 */
6: mfspr r6,SPRN_CTRLF
stw r6,VCPU_CTRL(r9)
@@ -1112,38 +1117,41 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
/*
* For external and machine check interrupts, we need
* to call the Linux handler to process the interrupt.
- * We do that by jumping to the interrupt vector address
- * which we have in r12. The [h]rfid at the end of the
+ * We do that by jumping to absolute address 0x500 for
+ * external interrupts, or the machine_check_fwnmi label
+ * for machine checks (since firmware might have patched
+ * the vector area at 0x200). The [h]rfid at the end of the
* handler will return to the book3s_hv_interrupts.S code.
* For other interrupts we do the rfid to get back
- * to the book3s_interrupts.S code here.
+ * to the book3s_hv_interrupts.S code here.
*/
ld r8, HSTATE_VMHANDLER(r13)
ld r7, HSTATE_HOST_MSR(r13)
+ cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
+BEGIN_FTR_SECTION
beq 11f
- cmpwi r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* RFI into the highmem handler, or branch to interrupt handler */
-12: mfmsr r6
- mtctr r12
+ mfmsr r6
li r0, MSR_RI
andc r6, r6, r0
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
- beqctr
+ beqa 0x500 /* external interrupt (PPC970) */
+ beq cr1, 13f /* machine check */
RFI
-11:
-BEGIN_FTR_SECTION
- b 12b
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
- mtspr SPRN_HSRR0, r8
+ /* On POWER7, we have external interrupts set to use HSRR0/1 */
+11: mtspr SPRN_HSRR0, r8
mtspr SPRN_HSRR1, r7
ba 0x500
+13: b machine_check_fwnmi
+
/*
* Check whether an HDSI is an HPTE not found fault or something else.
* If it is an HPTE not found fault that is due to the guest accessing
@@ -1176,7 +1184,7 @@ kvmppc_hdsi:
cmpdi r3, 0 /* retry the instruction */
beq 6f
cmpdi r3, -1 /* handle in kernel mode */
- beq nohpte_cont
+ beq guest_exit_cont
cmpdi r3, -2 /* MMIO emulation; need instr word */
beq 2f
@@ -1190,6 +1198,7 @@ kvmppc_hdsi:
li r10, BOOK3S_INTERRUPT_DATA_STORAGE
li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
rotldi r11, r11, 63
+fast_interrupt_c_return:
6: ld r7, VCPU_CTR(r9)
lwz r8, VCPU_XER(r9)
mtctr r7
@@ -1222,7 +1231,7 @@ kvmppc_hdsi:
/* Unset guest mode. */
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
- b nohpte_cont
+ b guest_exit_cont
/*
* Similarly for an HISI, reflect it to the guest as an ISI unless
@@ -1248,9 +1257,9 @@ kvmppc_hisi:
ld r11, VCPU_MSR(r9)
li r12, BOOK3S_INTERRUPT_H_INST_STORAGE
cmpdi r3, 0 /* retry the instruction */
- beq 6f
+ beq fast_interrupt_c_return
cmpdi r3, -1 /* handle in kernel mode */
- beq nohpte_cont
+ beq guest_exit_cont
/* Synthesize an ISI for the guest */
mr r11, r3
@@ -1259,12 +1268,7 @@ kvmppc_hisi:
li r10, BOOK3S_INTERRUPT_INST_STORAGE
li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
rotldi r11, r11, 63
-6: ld r7, VCPU_CTR(r9)
- lwz r8, VCPU_XER(r9)
- mtctr r7
- mtxer r8
- mr r4, r9
- b fast_guest_return
+ b fast_interrupt_c_return
3: ld r6, VCPU_KVM(r9) /* not relocated, use VRMA */
ld r5, KVM_VRMA_SLB_V(r6)
@@ -1280,14 +1284,14 @@ kvmppc_hisi:
hcall_try_real_mode:
ld r3,VCPU_GPR(R3)(r9)
andi. r0,r11,MSR_PR
- bne hcall_real_cont
+ bne guest_exit_cont
clrrdi r3,r3,2
cmpldi r3,hcall_real_table_end - hcall_real_table
- bge hcall_real_cont
+ bge guest_exit_cont
LOAD_REG_ADDR(r4, hcall_real_table)
lwzx r3,r3,r4
cmpwi r3,0
- beq hcall_real_cont
+ beq guest_exit_cont
add r3,r3,r4
mtctr r3
mr r3,r9 /* get vcpu pointer */
@@ -1308,7 +1312,7 @@ hcall_real_fallback:
li r12,BOOK3S_INTERRUPT_SYSCALL
ld r9, HSTATE_KVM_VCPU(r13)
- b hcall_real_cont
+ b guest_exit_cont
.globl hcall_real_table
hcall_real_table:
@@ -1567,6 +1571,21 @@ kvm_cede_exit:
li r3,H_TOO_HARD
blr
+ /* Try to handle a machine check in real mode */
+machine_check_realmode:
+ mr r3, r9 /* get vcpu pointer */
+ bl .kvmppc_realmode_machine_check
+ nop
+ cmpdi r3, 0 /* continue exiting from guest? */
+ ld r9, HSTATE_KVM_VCPU(r13)
+ li r12, BOOK3S_INTERRUPT_MACHINE_CHECK
+ beq mc_cont
+ /* If not, deliver a machine check. SRR0/1 are already set */
+ li r10, BOOK3S_INTERRUPT_MACHINE_CHECK
+ li r11, (MSR_ME << 1) | 1 /* synthesize MSR_SF | MSR_ME */
+ rotldi r11, r11, 63
+ b fast_interrupt_c_return
+
secondary_too_late:
ld r5,HSTATE_KVM_VCORE(r13)
HMT_LOW
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 17/28] KVM: PPC: e500: Silence bogus GCC warning in tlb code
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (15 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 16/28] KVM: PPC: Book3S HV: Handle guest-caused machine checks on POWER7 without panicking Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 18/28] KVM: PPC: booke: Fix get_tb() compile error on 64-bit Alexander Graf
` (11 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Mihai Caraman
From: Mihai Caraman <mihai.caraman@freescale.com>
64-bit GCC 4.5.1 warns about an uninitialized variable which was guarded
by a flag. Initialize the variable to make it happy.
Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: reword comment]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/e500_tlb.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 6305ee6..5532bfb 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -415,7 +415,8 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
struct tlbe_ref *ref)
{
struct kvm_memory_slot *slot;
- unsigned long pfn, hva;
+ unsigned long pfn = 0; /* silence GCC warning */
+ unsigned long hva;
int pfnmap = 0;
int tsize = BOOK3E_PAGESZ_4K;
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 18/28] KVM: PPC: booke: Fix get_tb() compile error on 64-bit
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (16 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 17/28] KVM: PPC: e500: Silence bogus GCC warning in tlb code Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 19/28] KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler Alexander Graf
` (10 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Mihai Caraman
From: Mihai Caraman <mihai.caraman@freescale.com>
Include header file for get_tb() declaration.
Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/booke.c | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 3d1f35d..7c9c389 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -36,6 +36,7 @@
#include <asm/dbell.h>
#include <asm/hw_irq.h>
#include <asm/irq.h>
+#include <asm/time.h>
#include "timing.h"
#include "booke.h"
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 19/28] KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (17 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 18/28] KVM: PPC: booke: Fix get_tb() compile error on 64-bit Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 20/28] KVM: PPC: bookehv64: Add support for interrupt handling Alexander Graf
` (9 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Mihai Caraman
From: Mihai Caraman <mihai.caraman@freescale.com>
GET_VCPU define will not be implemented for 64-bit for performance reasons
so get rid of it also on 32-bit.
Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/bookehv_interrupts.S | 7 ++-----
1 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 099fe82..fa6d552 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -32,9 +32,6 @@
#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
-#define GET_VCPU(vcpu, thread) \
- PPC_LL vcpu, THREAD_KVM_VCPU(thread)
-
#define LONGBYTES (BITS_PER_LONG / 8)
#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES))
@@ -206,7 +203,7 @@
*/
.macro kvm_handler intno srr0, srr1, flags
_GLOBAL(kvmppc_handler_\intno\()_\srr1)
- GET_VCPU(r11, r10)
+ PPC_LL r11, THREAD_KVM_VCPU(r10)
PPC_STL r3, VCPU_GPR(R3)(r11)
mfspr r3, SPRN_SPRG_RSCRATCH0
PPC_STL r4, VCPU_GPR(R4)(r11)
@@ -233,7 +230,7 @@ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
.macro kvm_lvl_handler intno scratch srr0, srr1, flags
_GLOBAL(kvmppc_handler_\intno\()_\srr1)
mfspr r10, SPRN_SPRG_THREAD
- GET_VCPU(r11, r10)
+ PPC_LL r11, THREAD_KVM_VCPU(r10)
PPC_STL r3, VCPU_GPR(R3)(r11)
mfspr r3, \scratch
PPC_STL r4, VCPU_GPR(R4)(r11)
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 20/28] KVM: PPC: bookehv64: Add support for interrupt handling
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (18 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 19/28] KVM: PPC: bookehv: Remove GET_VCPU macro from exception handler Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 21/28] KVM: PPC: e500: Add emulation helper for getting instruction ea Alexander Graf
` (8 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Mihai Caraman
From: Mihai Caraman <mihai.caraman@freescale.com>
Add interrupt handling support for 64-bit bookehv hosts. Unify 32 and 64 bit
implementations using a common stack layout and a common execution flow starting
from kvm_handler_common macro. Update documentation for 64-bit input register
values. This patch only address the bolted TLB miss exception handlers version.
Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/kvm_booke_hv_asm.h | 25 +++++
arch/powerpc/kvm/bookehv_interrupts.S | 138 +++++++++++++++++++++++++--
2 files changed, 155 insertions(+), 8 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
index a37a12a..3a79f53 100644
--- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h
+++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h
@@ -17,6 +17,7 @@
* there are no exceptions for which we fall through directly to
* the normal host handler.
*
+ * 32-bit host
* Expected inputs (normal exceptions):
* SCRATCH0 = saved r10
* r10 = thread struct
@@ -33,6 +34,30 @@
* *(r8 + GPR9) = saved r9
* *(r8 + GPR10) = saved r10 (r10 not yet clobbered)
* *(r8 + GPR11) = saved r11
+ *
+ * 64-bit host
+ * Expected inputs (GEN/GDBELL/DBG/MC exception types):
+ * r10 = saved CR
+ * r13 = PACA_POINTER
+ * *(r13 + PACA_EX##type + EX_R10) = saved r10
+ * *(r13 + PACA_EX##type + EX_R11) = saved r11
+ * SPRN_SPRG_##type##_SCRATCH = saved r13
+ *
+ * Expected inputs (CRIT exception type):
+ * r10 = saved CR
+ * r13 = PACA_POINTER
+ * *(r13 + PACA_EX##type + EX_R10) = saved r10
+ * *(r13 + PACA_EX##type + EX_R11) = saved r11
+ * *(r13 + PACA_EX##type + EX_R13) = saved r13
+ *
+ * Expected inputs (TLB exception type):
+ * r10 = saved CR
+ * r13 = PACA_POINTER
+ * *(r13 + PACA_EX##type + EX_TLB_R10) = saved r10
+ * *(r13 + PACA_EX##type + EX_TLB_R11) = saved r11
+ * SPRN_SPRG_GEN_SCRATCH = saved r13
+ *
+ * Only the bolted version of TLB miss exception handlers is supported now.
*/
.macro DO_KVM intno srr1
#ifdef CONFIG_KVM_BOOKE_HV
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index fa6d552..e8ed7d6 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -16,6 +16,7 @@
*
* Author: Varun Sethi <varun.sethi@freescale.com>
* Author: Scott Wood <scotwood@freescale.com>
+ * Author: Mihai Caraman <mihai.caraman@freescale.com>
*
* This file is derived from arch/powerpc/kvm/booke_interrupts.S
*/
@@ -30,28 +31,33 @@
#include <asm/bitsperlong.h>
#include <asm/thread_info.h>
+#ifdef CONFIG_64BIT
+#include <asm/exception-64e.h>
+#else
#include "../kernel/head_booke.h" /* for THREAD_NORMSAVE() */
+#endif
#define LONGBYTES (BITS_PER_LONG / 8)
#define VCPU_GUEST_SPRG(n) (VCPU_GUEST_SPRGS + (n * LONGBYTES))
/* The host stack layout: */
-#define HOST_R1 (0 * LONGBYTES) /* Implied by stwu. */
-#define HOST_CALLEE_LR (1 * LONGBYTES)
-#define HOST_RUN (2 * LONGBYTES) /* struct kvm_run */
+#define HOST_R1 0 /* Implied by stwu. */
+#define HOST_CALLEE_LR PPC_LR_STKOFF
+#define HOST_RUN (HOST_CALLEE_LR + LONGBYTES)
/*
* r2 is special: it holds 'current', and it made nonvolatile in the
* kernel with the -ffixed-r2 gcc option.
*/
-#define HOST_R2 (3 * LONGBYTES)
-#define HOST_CR (4 * LONGBYTES)
-#define HOST_NV_GPRS (5 * LONGBYTES)
+#define HOST_R2 (HOST_RUN + LONGBYTES)
+#define HOST_CR (HOST_R2 + LONGBYTES)
+#define HOST_NV_GPRS (HOST_CR + LONGBYTES)
#define __HOST_NV_GPR(n) (HOST_NV_GPRS + ((n - 14) * LONGBYTES))
#define HOST_NV_GPR(n) __HOST_NV_GPR(__REG_##n)
#define HOST_MIN_STACK_SIZE (HOST_NV_GPR(R31) + LONGBYTES)
#define HOST_STACK_SIZE ((HOST_MIN_STACK_SIZE + 15) & ~15) /* Align. */
-#define HOST_STACK_LR (HOST_STACK_SIZE + LONGBYTES) /* In caller stack frame. */
+/* LR in caller stack frame. */
+#define HOST_STACK_LR (HOST_STACK_SIZE + PPC_LR_STKOFF)
#define NEED_EMU 0x00000001 /* emulation -- save nv regs */
#define NEED_DEAR 0x00000002 /* save faulting DEAR */
@@ -198,6 +204,122 @@
b kvmppc_resume_host
.endm
+#ifdef CONFIG_64BIT
+/* Exception types */
+#define EX_GEN 1
+#define EX_GDBELL 2
+#define EX_DBG 3
+#define EX_MC 4
+#define EX_CRIT 5
+#define EX_TLB 6
+
+/*
+ * For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
+ */
+.macro kvm_handler intno type scratch, paca_ex, ex_r10, ex_r11, srr0, srr1, flags
+ _GLOBAL(kvmppc_handler_\intno\()_\srr1)
+ mr r11, r4
+ /*
+ * Get vcpu from Paca: paca->__current.thread->kvm_vcpu
+ */
+ PPC_LL r4, PACACURRENT(r13)
+ PPC_LL r4, (THREAD + THREAD_KVM_VCPU)(r4)
+ stw r10, VCPU_CR(r4)
+ PPC_STL r11, VCPU_GPR(R4)(r4)
+ PPC_STL r5, VCPU_GPR(R5)(r4)
+ .if \type == EX_CRIT
+ PPC_LL r5, (\paca_ex + EX_R13)(r13)
+ .else
+ mfspr r5, \scratch
+ .endif
+ PPC_STL r6, VCPU_GPR(R6)(r4)
+ PPC_STL r8, VCPU_GPR(R8)(r4)
+ PPC_STL r9, VCPU_GPR(R9)(r4)
+ PPC_STL r5, VCPU_GPR(R13)(r4)
+ PPC_LL r6, (\paca_ex + \ex_r10)(r13)
+ PPC_LL r8, (\paca_ex + \ex_r11)(r13)
+ PPC_STL r3, VCPU_GPR(R3)(r4)
+ PPC_STL r7, VCPU_GPR(R7)(r4)
+ PPC_STL r12, VCPU_GPR(R12)(r4)
+ PPC_STL r6, VCPU_GPR(R10)(r4)
+ PPC_STL r8, VCPU_GPR(R11)(r4)
+ mfctr r5
+ PPC_STL r5, VCPU_CTR(r4)
+ mfspr r5, \srr0
+ mfspr r6, \srr1
+ kvm_handler_common \intno, \srr0, \flags
+.endm
+
+#define EX_PARAMS(type) \
+ EX_##type, \
+ SPRN_SPRG_##type##_SCRATCH, \
+ PACA_EX##type, \
+ EX_R10, \
+ EX_R11
+
+#define EX_PARAMS_TLB \
+ EX_TLB, \
+ SPRN_SPRG_GEN_SCRATCH, \
+ PACA_EXTLB, \
+ EX_TLB_R10, \
+ EX_TLB_R11
+
+kvm_handler BOOKE_INTERRUPT_CRITICAL, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_MACHINE_CHECK, EX_PARAMS(MC), \
+ SPRN_MCSRR0, SPRN_MCSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1,(NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_INST_STORAGE, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, NEED_ESR
+kvm_handler BOOKE_INTERRUPT_EXTERNAL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_ALIGNMENT, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1,(NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_PROGRAM, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1,NEED_ESR
+kvm_handler BOOKE_INTERRUPT_FP_UNAVAIL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_AP_UNAVAIL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DECREMENTER, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_FIT, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_WATCHDOG, EX_PARAMS(CRIT),\
+ SPRN_CSRR0, SPRN_CSRR1, 0
+/*
+ * Only bolted TLB miss exception handlers are supported for now
+ */
+kvm_handler BOOKE_INTERRUPT_DTLB_MISS, EX_PARAMS_TLB, \
+ SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
+kvm_handler BOOKE_INTERRUPT_ITLB_MISS, EX_PARAMS_TLB, \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_UNAVAIL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_DATA, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_SPE_FP_ROUND, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_PERFORMANCE_MONITOR, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_DOORBELL_CRITICAL, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_HV_PRIV, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, NEED_EMU
+kvm_handler BOOKE_INTERRUPT_HV_SYSCALL, EX_PARAMS(GEN), \
+ SPRN_SRR0, SPRN_SRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL, EX_PARAMS(GDBELL), \
+ SPRN_GSRR0, SPRN_GSRR1, 0
+kvm_handler BOOKE_INTERRUPT_GUEST_DBELL_CRIT, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(DBG), \
+ SPRN_DSRR0, SPRN_DSRR1, 0
+kvm_handler BOOKE_INTERRUPT_DEBUG, EX_PARAMS(CRIT), \
+ SPRN_CSRR0, SPRN_CSRR1, 0
+#else
/*
* For input register values, see arch/powerpc/include/asm/kvm_booke_hv_asm.h
*/
@@ -292,7 +414,7 @@ kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
SPRN_SPRG_RSCRATCH_CRIT, SPRN_CSRR0, SPRN_CSRR1, 0
kvm_lvl_handler BOOKE_INTERRUPT_DEBUG, \
SPRN_SPRG_RSCRATCH_DBG, SPRN_DSRR0, SPRN_DSRR1, 0
-
+#endif
/* Registers:
* SPRG_SCRATCH0: guest r10
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 21/28] KVM: PPC: e500: Add emulation helper for getting instruction ea
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (19 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 20/28] KVM: PPC: bookehv64: Add support for interrupt handling Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 22/28] KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation Alexander Graf
` (7 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Mihai Caraman
From: Mihai Caraman <mihai.caraman@freescale.com>
Add emulation helper for getting instruction ea and refactor tlb instruction
emulation to use it.
Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: keep rt variable around]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/kvm_ppc.h | 11 +++++++++++
arch/powerpc/kvm/e500.h | 6 +++---
arch/powerpc/kvm/e500_emulate.c | 14 ++++++++++----
arch/powerpc/kvm/e500_tlb.c | 33 +++++++++++----------------------
4 files changed, 35 insertions(+), 29 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 1ca31e9..d55a2b2 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -295,4 +295,15 @@ static inline void kvmppc_lazy_ee_enable(void)
#endif
}
+static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
+{
+ ulong ea;
+
+ ea = kvmppc_get_gpr(vcpu, rb);
+ if (ra)
+ ea += kvmppc_get_gpr(vcpu, ra);
+
+ return ea;
+}
+
#endif /* __POWERPC_KVM_PPC_H__ */
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index d162286..32e98a7 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -129,9 +129,9 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500,
ulong value);
int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu);
int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu);
-int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb);
-int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb);
-int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb);
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea);
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea);
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea);
int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500);
void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500);
diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c
index e04b0ef..e78f353 100644
--- a/arch/powerpc/kvm/e500_emulate.c
+++ b/arch/powerpc/kvm/e500_emulate.c
@@ -89,6 +89,7 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
int ra = get_ra(inst);
int rb = get_rb(inst);
int rt = get_rt(inst);
+ gva_t ea;
switch (get_op(inst)) {
case 31:
@@ -113,15 +114,20 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case XOP_TLBSX:
- emulated = kvmppc_e500_emul_tlbsx(vcpu,rb);
+ ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+ emulated = kvmppc_e500_emul_tlbsx(vcpu, ea);
break;
- case XOP_TLBILX:
- emulated = kvmppc_e500_emul_tlbilx(vcpu, rt, ra, rb);
+ case XOP_TLBILX: {
+ int type = rt & 0x3;
+ ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+ emulated = kvmppc_e500_emul_tlbilx(vcpu, type, ea);
break;
+ }
case XOP_TLBIVAX:
- emulated = kvmppc_e500_emul_tlbivax(vcpu, ra, rb);
+ ea = kvmppc_get_ea_indexed(vcpu, ra, rb);
+ emulated = kvmppc_e500_emul_tlbivax(vcpu, ea);
break;
default:
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 5532bfb..7a14721 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -689,14 +689,11 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value)
return EMULATE_DONE;
}
-int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
+int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
unsigned int ia;
int esel, tlbsel;
- gva_t ea;
-
- ea = ((ra) ? kvmppc_get_gpr(vcpu, ra) : 0) + kvmppc_get_gpr(vcpu, rb);
ia = (ea >> 2) & 0x1;
@@ -723,7 +720,7 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, int ra, int rb)
}
static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
- int pid, int rt)
+ int pid, int type)
{
struct kvm_book3e_206_tlb_entry *tlbe;
int tid, esel;
@@ -732,7 +729,7 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
for (esel = 0; esel < vcpu_e500->gtlb_params[tlbsel].entries; esel++) {
tlbe = get_entry(vcpu_e500, tlbsel, esel);
tid = get_tlb_tid(tlbe);
- if (rt == 0 || tid == pid) {
+ if (type == 0 || tid == pid) {
inval_gtlbe_on_host(vcpu_e500, tlbsel, esel);
kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel);
}
@@ -740,14 +737,9 @@ static void tlbilx_all(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel,
}
static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
- int ra, int rb)
+ gva_t ea)
{
int tlbsel, esel;
- gva_t ea;
-
- ea = kvmppc_get_gpr(&vcpu_e500->vcpu, rb);
- if (ra)
- ea += kvmppc_get_gpr(&vcpu_e500->vcpu, ra);
for (tlbsel = 0; tlbsel < 2; tlbsel++) {
esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, -1);
@@ -759,16 +751,16 @@ static void tlbilx_one(struct kvmppc_vcpu_e500 *vcpu_e500, int pid,
}
}
-int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int rt, int ra, int rb)
+int kvmppc_e500_emul_tlbilx(struct kvm_vcpu *vcpu, int type, gva_t ea)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int pid = get_cur_spid(vcpu);
- if (rt == 0 || rt == 1) {
- tlbilx_all(vcpu_e500, 0, pid, rt);
- tlbilx_all(vcpu_e500, 1, pid, rt);
- } else if (rt == 3) {
- tlbilx_one(vcpu_e500, pid, ra, rb);
+ if (type == 0 || type == 1) {
+ tlbilx_all(vcpu_e500, 0, pid, type);
+ tlbilx_all(vcpu_e500, 1, pid, type);
+ } else if (type == 3) {
+ tlbilx_one(vcpu_e500, pid, ea);
}
return EMULATE_DONE;
@@ -793,16 +785,13 @@ int kvmppc_e500_emul_tlbre(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
-int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, int rb)
+int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea)
{
struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu);
int as = !!get_cur_sas(vcpu);
unsigned int pid = get_cur_spid(vcpu);
int esel, tlbsel;
struct kvm_book3e_206_tlb_entry *gtlbe = NULL;
- gva_t ea;
-
- ea = kvmppc_get_gpr(vcpu, rb);
for (tlbsel = 0; tlbsel < 2; tlbsel++) {
esel = kvmppc_e500_tlb_index(vcpu_e500, ea, tlbsel, pid, as);
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 22/28] KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (20 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 21/28] KVM: PPC: e500: Add emulation helper for getting instruction ea Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 23/28] KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation Alexander Graf
` (6 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Mihai Caraman
From: Mihai Caraman <mihai.caraman@freescale.com>
Mask high 32 bits of effective address in emulation layer for guests running
in 32-bit mode.
Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: fix indent]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/kvm_ppc.h | 10 ++++++++++
1 files changed, 10 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index d55a2b2..572aa75 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -298,11 +298,21 @@ static inline void kvmppc_lazy_ee_enable(void)
static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
{
ulong ea;
+ ulong msr_64bit = 0;
ea = kvmppc_get_gpr(vcpu, rb);
if (ra)
ea += kvmppc_get_gpr(vcpu, ra);
+#if defined(CONFIG_PPC_BOOK3E_64)
+ msr_64bit = MSR_CM;
+#elif defined(CONFIG_PPC_BOOK3S_64)
+ msr_64bit = MSR_SF;
+#endif
+
+ if (!(vcpu->arch.shared->msr & msr_64bit))
+ ea = (uint32_t)ea;
+
return ea;
}
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 23/28] KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (21 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 22/28] KVM: PPC: Mask ea's high 32-bits in 32/64 instr emulation Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 24/28] KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit Alexander Graf
` (5 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Mihai Caraman
From: Mihai Caraman <mihai.caraman@freescale.com>
Mask high 32 bits of MAS2's effective page number in tlbwe emulation for guests
running in 32-bit mode.
Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/e500_tlb.c | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index 7a14721..cf3f180 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -871,6 +871,8 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu)
gtlbe->mas1 = vcpu->arch.shared->mas1;
gtlbe->mas2 = vcpu->arch.shared->mas2;
+ if (!(vcpu->arch.shared->msr & MSR_CM))
+ gtlbe->mas2 &= 0xffffffffUL;
gtlbe->mas7_3 = vcpu->arch.shared->mas7_3;
trace_kvm_booke206_gtlb_write(vcpu->arch.shared->mas0, gtlbe->mas1,
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 24/28] KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (22 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 23/28] KVM: PPC: e500: Mask MAS2 EPN high 32-bits in 32/64 tlbwe emulation Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 25/28] KVM: PPC: Make EPCR a valid field for booke64 and bookehv Alexander Graf
` (4 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Mihai Caraman
From: Mihai Caraman <mihai.caraman@freescale.com>
Extend MAS2 EPN mask to retain most significant bits on 64-bit hosts.
Use this mask in tlb effective address accessor.
Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/mmu-book3e.h | 2 +-
arch/powerpc/kvm/e500.h | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h
index eeabcdb..99d43e0 100644
--- a/arch/powerpc/include/asm/mmu-book3e.h
+++ b/arch/powerpc/include/asm/mmu-book3e.h
@@ -59,7 +59,7 @@
#define MAS1_TSIZE_SHIFT 7
#define MAS1_TSIZE(x) (((x) << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK)
-#define MAS2_EPN 0xFFFFF000
+#define MAS2_EPN (~0xFFFUL)
#define MAS2_X0 0x00000040
#define MAS2_X1 0x00000020
#define MAS2_W 0x00000010
diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h
index 32e98a7..c70d37e 100644
--- a/arch/powerpc/kvm/e500.h
+++ b/arch/powerpc/kvm/e500.h
@@ -154,7 +154,7 @@ get_tlb_size(const struct kvm_book3e_206_tlb_entry *tlbe)
static inline gva_t get_tlb_eaddr(const struct kvm_book3e_206_tlb_entry *tlbe)
{
- return tlbe->mas2 & 0xfffff000;
+ return tlbe->mas2 & MAS2_EPN;
}
static inline u64 get_tlb_bytes(const struct kvm_book3e_206_tlb_entry *tlbe)
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 25/28] KVM: PPC: Make EPCR a valid field for booke64 and bookehv
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (23 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 24/28] KVM: PPC: booke: Extend MAS2 EPN mask for 64-bit Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 26/28] KVM: PPC: bookehv: Add guest computation mode for irq delivery Alexander Graf
` (3 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list
In BookE, EPCR is defined and valid when either the HV or the 64bit
category are implemented. Reflect this in the field definition.
Today the only KVM target on 64bit is HV enabled, so there is no
change in actual source code, but this keeps the code closer to the
spec and doesn't build up artificial road blocks for a PR KVM
on 64bit.
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/include/asm/kvm_host.h | 7 ++++++-
1 files changed, 6 insertions(+), 1 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 62fbd38..ca9bf45 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -406,13 +406,18 @@ struct kvm_vcpu_arch {
u32 host_mas4;
u32 host_mas6;
u32 shadow_epcr;
- u32 epcr;
u32 shadow_msrp;
u32 eplc;
u32 epsc;
u32 oldpir;
#endif
+#if defined(CONFIG_BOOKE)
+#if defined(CONFIG_KVM_BOOKE_HV) || defined(CONFIG_64BIT)
+ u32 epcr;
+#endif
+#endif
+
#ifdef CONFIG_PPC_BOOK3S
/* For Gekko paired singles */
u32 qpr[32];
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 26/28] KVM: PPC: bookehv: Add guest computation mode for irq delivery
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (24 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 25/28] KVM: PPC: Make EPCR a valid field for booke64 and bookehv Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 27/28] KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation Alexander Graf
` (2 subsequent siblings)
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Mihai Caraman
From: Mihai Caraman <mihai.caraman@freescale.com>
When delivering guest IRQs, update MSR computation mode according to guest
interrupt computation mode found in EPCR.
Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: remove HV dependency in the code]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/booke.c | 9 ++++++++-
1 files changed, 8 insertions(+), 1 deletions(-)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7c9c389..9457fb1 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -312,6 +312,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
bool crit;
bool keep_irq = false;
enum int_class int_class;
+ ulong new_msr = vcpu->arch.shared->msr;
/* Truncate crit indicators in 32 bit mode */
if (!(vcpu->arch.shared->msr & MSR_SF)) {
@@ -407,7 +408,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu,
set_guest_esr(vcpu, vcpu->arch.queued_esr);
if (update_dear == true)
set_guest_dear(vcpu, vcpu->arch.queued_dear);
- kvmppc_set_msr(vcpu, vcpu->arch.shared->msr & msr_mask);
+
+ new_msr &= msr_mask;
+#if defined(CONFIG_64BIT)
+ if (vcpu->arch.epcr & SPRN_EPCR_ICM)
+ new_msr |= MSR_CM;
+#endif
+ kvmppc_set_msr(vcpu, new_msr);
if (!keep_irq)
clear_bit(priority, &vcpu->arch.pending_exceptions);
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 27/28] KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (25 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 26/28] KVM: PPC: bookehv: Add guest computation mode for irq delivery Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-06 0:41 ` [PATCH 28/28] KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface Alexander Graf
2012-12-09 20:44 ` [PULL 00/28] ppc patch queue 2012-12-06 Marcelo Tosatti
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Mihai Caraman
From: Mihai Caraman <mihai.caraman@freescale.com>
Add EPCR support in booke mtspr/mfspr emulation. EPCR register is defined only
for 64-bit and HV categories, we will expose it at this point only to 64-bit
virtual processors running on 64-bit HV hosts.
Define a reusable setter function for vcpu's EPCR.
Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: move HV dependency in the code]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
arch/powerpc/kvm/booke.c | 12 ++++++++++++
arch/powerpc/kvm/booke.h | 1 +
arch/powerpc/kvm/booke_emulate.c | 14 +++++++++++++-
3 files changed, 26 insertions(+), 1 deletions(-)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 9457fb1..037d045 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1473,6 +1473,18 @@ void kvmppc_core_flush_memslot(struct kvm *kvm, struct kvm_memory_slot *memslot)
{
}
+void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr)
+{
+#if defined(CONFIG_64BIT)
+ vcpu->arch.epcr = new_epcr;
+#ifdef CONFIG_KVM_BOOKE_HV
+ vcpu->arch.shadow_epcr &= ~SPRN_EPCR_GICM;
+ if (vcpu->arch.epcr & SPRN_EPCR_ICM)
+ vcpu->arch.shadow_epcr |= SPRN_EPCR_GICM;
+#endif
+#endif
+}
+
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
{
vcpu->arch.tcr = new_tcr;
diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
index ba61974..e9b88e4 100644
--- a/arch/powerpc/kvm/booke.h
+++ b/arch/powerpc/kvm/booke.h
@@ -69,6 +69,7 @@ extern unsigned long kvmppc_booke_handlers;
void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr);
void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr);
+void kvmppc_set_epcr(struct kvm_vcpu *vcpu, u32 new_epcr);
void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr);
void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits);
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 514790f..4685b8c 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -240,7 +240,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_MCSR:
vcpu->arch.mcsr &= ~spr_val;
break;
-
+#if defined(CONFIG_64BIT)
+ case SPRN_EPCR:
+ kvmppc_set_epcr(vcpu, spr_val);
+#ifdef CONFIG_KVM_BOOKE_HV
+ mtspr(SPRN_EPCR, vcpu->arch.shadow_epcr);
+#endif
+ break;
+#endif
default:
emulated = EMULATE_FAIL;
}
@@ -335,6 +342,11 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_MCSR:
*spr_val = vcpu->arch.mcsr;
break;
+#if defined(CONFIG_64BIT)
+ case SPRN_EPCR:
+ *spr_val = vcpu->arch.epcr;
+ break;
+#endif
default:
emulated = EMULATE_FAIL;
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* [PATCH 28/28] KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (26 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 27/28] KVM: PPC: bookehv: Add EPCR support in mtspr/mfspr emulation Alexander Graf
@ 2012-12-06 0:41 ` Alexander Graf
2012-12-09 20:44 ` [PULL 00/28] ppc patch queue 2012-12-06 Marcelo Tosatti
28 siblings, 0 replies; 30+ messages in thread
From: Alexander Graf @ 2012-12-06 0:41 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: Gleb Natapov, kvm-ppc, KVM list, Mihai Caraman
From: Mihai Caraman <mihai.caraman@freescale.com>
Implement ONE_REG interface for EPCR register adding KVM_REG_PPC_EPCR to
the list of ONE_REG PPC supported registers.
Signed-off-by: Mihai Caraman <mihai.caraman@freescale.com>
[agraf: remove HV dependency, use get/put_user]
Signed-off-by: Alexander Graf <agraf@suse.de>
---
Documentation/virtual/kvm/api.txt | 1 +
arch/powerpc/include/uapi/asm/kvm.h | 2 ++
arch/powerpc/kvm/booke.c | 14 ++++++++++++++
3 files changed, 17 insertions(+), 0 deletions(-)
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a5607c5..a4df553 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1773,6 +1773,7 @@ registers, find a list below:
PPC | KVM_REG_PPC_VPA_ADDR | 64
PPC | KVM_REG_PPC_VPA_SLB | 128
PPC | KVM_REG_PPC_VPA_DTL | 128
+ PPC | KVM_REG_PPC_EPCR | 32
4.69 KVM_GET_ONE_REG
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 514883d..2fba8a6 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -411,4 +411,6 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_VPA_SLB (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x83)
#define KVM_REG_PPC_VPA_DTL (KVM_REG_PPC | KVM_REG_SIZE_U128 | 0x84)
+#define KVM_REG_PPC_EPCR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x85)
+
#endif /* __LINUX_KVM_POWERPC_H */
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 037d045..69f1140 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1388,6 +1388,11 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
&vcpu->arch.dbg_reg.dac[dac], sizeof(u64));
break;
}
+#if defined(CONFIG_64BIT)
+ case KVM_REG_PPC_EPCR:
+ r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr);
+ break;
+#endif
default:
break;
}
@@ -1415,6 +1420,15 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
(u64 __user *)(long)reg->addr, sizeof(u64));
break;
}
+#if defined(CONFIG_64BIT)
+ case KVM_REG_PPC_EPCR: {
+ u32 new_epcr;
+ r = get_user(new_epcr, (u32 __user *)(long)reg->addr);
+ if (r == 0)
+ kvmppc_set_epcr(vcpu, new_epcr);
+ break;
+ }
+#endif
default:
break;
}
--
1.6.0.2
^ permalink raw reply related [flat|nested] 30+ messages in thread* Re: [PULL 00/28] ppc patch queue 2012-12-06
2012-12-06 0:40 [PULL 00/28] ppc patch queue 2012-12-06 Alexander Graf
` (27 preceding siblings ...)
2012-12-06 0:41 ` [PATCH 28/28] KVM: PPC: booke: Get/set guest EPCR register using ONE_REG interface Alexander Graf
@ 2012-12-09 20:44 ` Marcelo Tosatti
28 siblings, 0 replies; 30+ messages in thread
From: Marcelo Tosatti @ 2012-12-09 20:44 UTC (permalink / raw)
To: Alexander Graf; +Cc: Gleb Natapov, kvm-ppc, KVM list
On Thu, Dec 06, 2012 at 01:40:49AM +0100, Alexander Graf wrote:
> Hi Marcelo / Gleb,
>
> This is my current patch queue for ppc. Please pull.
>
> Major changes this time around include:
>
> - Book3S PR: Multiple fixes for POWER7 hosts
> - Book3S HV: Prepare for full state exposure (live migration)
> - Add eventfd support
> - BookE: Add final bits for 64bit BookE support
>
> Alex
Pulled, thanks.
^ permalink raw reply [flat|nested] 30+ messages in thread