* Re: [PATCH v2 16/37] KVM: PPC: Book3S HV P9: Stop handling hcalls in real-mode in the P9 path
From: Cédric Le Goater @ 2021-02-25 14:51 UTC (permalink / raw)
To: Nicholas Piggin, kvm-ppc; +Cc: linuxppc-dev
In-Reply-To: <20210225134652.2127648-17-npiggin@gmail.com>
On 2/25/21 2:46 PM, Nicholas Piggin wrote:
> In the interest of minimising the amount of code that is run in
> "real-mode", don't handle hcalls in real mode in the P9 path.
>
> POWER8 and earlier are much more expensive to exit from HV real mode
> and switch to host mode, because on those processors HV interrupts get
> to the hypervisor with the MMU off, and the other threads in the core
> need to be pulled out of the guest, and SLBs all need to be saved,
> ERATs invalidated, and host SLB reloaded before the MMU is re-enabled
> in host mode. Hash guests also require a lot of hcalls to run. The
> XICS interrupt controller requires hcalls to run.
>
> By contrast, POWER9 has independent thread switching, and in radix mode
> the hypervisor is already in a host virtual memory mode when the HV
> interrupt is taken. Radix + xive guests don't need hcalls to handle
> interrupts or manage translations.
>
> So it's much less important to handle hcalls in real mode in P9.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
> arch/powerpc/include/asm/kvm_ppc.h | 5 +++++
> arch/powerpc/kvm/book3s_hv.c | 25 ++++++++++++++++++++++---
> arch/powerpc/kvm/book3s_hv_rmhandlers.S | 5 +++++
> arch/powerpc/kvm/book3s_xive.c | 25 +++++++++++++++++++++++++
> 4 files changed, 57 insertions(+), 3 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
> index 73b1ca5a6471..db6646c2ade2 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -607,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm);
> extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
> extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
> extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
> +extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
> extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
> extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
> extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
> @@ -639,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
> static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
> static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
> { return 0; }
> +static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
> + { return 0; }
> #endif
>
> #ifdef CONFIG_KVM_XIVE
> @@ -673,6 +676,7 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
> int level, bool line_status);
> extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
> extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
> +extern void kvmppc_xive_cede_vcpu(struct kvm_vcpu *vcpu);
I can not find this routine. Is it missing or coming later in the patchset ?
C.
>
> static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
> {
> @@ -714,6 +718,7 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir
> int level, bool line_status) { return -ENODEV; }
> static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
> static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
> +static inline void kvmppc_xive_cede_vcpu(struct kvm_vcpu *vcpu) { }
>
> static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
> { return 0; }
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 7e23838b7f9b..d4770b222d7e 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -1144,7 +1144,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
> * This has to be done early, not in kvmppc_pseries_do_hcall(), so
> * that the cede logic in kvmppc_run_single_vcpu() works properly.
> */
> -static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
> +static void kvmppc_cede(struct kvm_vcpu *vcpu)
> {
> vcpu->arch.shregs.msr |= MSR_EE;
> vcpu->arch.ceded = 1;
> @@ -3731,15 +3731,34 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
> /* H_CEDE has to be handled now, not later */
> if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
> kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
> - kvmppc_nested_cede(vcpu);
> + kvmppc_cede(vcpu);
> kvmppc_set_gpr(vcpu, 3, 0);
> trap = 0;
> }
> } else {
> kvmppc_xive_push_vcpu(vcpu);
> trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
> - kvmppc_xive_pull_vcpu(vcpu);
> + /* H_CEDE has to be handled now, not later */
> + /* XICS hcalls must be handled before xive is pulled */
> + if (trap == BOOK3S_INTERRUPT_SYSCALL &&
> + !(vcpu->arch.shregs.msr & MSR_PR)) {
> + unsigned long req = kvmppc_get_gpr(vcpu, 3);
>
> + if (req == H_CEDE) {
> + kvmppc_cede(vcpu);
> + kvmppc_xive_cede_vcpu(vcpu); /* may un-cede */
> + kvmppc_set_gpr(vcpu, 3, 0);
> + trap = 0;
> + }
> + if (req == H_EOI || req == H_CPPR || req == H_IPI ||
> + req == H_IPOLL || req == H_XIRR || req == H_XIRR_X) {
> + unsigned long ret;
> + ret = kvmppc_xive_xics_hcall(vcpu, req);
> + kvmppc_set_gpr(vcpu, 3, ret);
> + trap = 0;
> + }
> + }
> + kvmppc_xive_pull_vcpu(vcpu);
> }
>
> vcpu->arch.slb_max = 0;
> diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> index c11597f815e4..2d0d14ed1d92 100644
> --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
> @@ -1397,9 +1397,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
> mr r4,r9
> bge fast_guest_return
> 2:
> + /* If we came in through the P9 short path, no real mode hcalls */
> + lwz r0, STACK_SLOT_SHORT_PATH(r1)
> + cmpwi r0, 0
> + bne no_try_real
> /* See if this is an hcall we can handle in real mode */
> cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
> beq hcall_try_real_mode
> +no_try_real:
>
> /* Hypervisor doorbell - exit only if host IPI flag set */
> cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
> diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
> index 8632fb998a55..d2266d36a7c7 100644
> --- a/arch/powerpc/kvm/book3s_xive.c
> +++ b/arch/powerpc/kvm/book3s_xive.c
> @@ -2109,6 +2109,31 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
> return 0;
> }
>
> +int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
> +{
> + struct kvmppc_vcore *vc = vcpu->arch.vcore;
> +
> + switch (req) {
> + case H_XIRR:
> + return xive_vm_h_xirr(vcpu);
> + case H_CPPR:
> + return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
> + case H_EOI:
> + return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
> + case H_IPI:
> + return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
> + kvmppc_get_gpr(vcpu, 5));
> + case H_IPOLL:
> + return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
> + case H_XIRR_X:
> + xive_vm_h_xirr(vcpu);
> + kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset);
> + return H_SUCCESS;
> + }
> +
> + return H_UNSUPPORTED;
> +}
> +
> int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
> {
> struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
>
^ permalink raw reply
* Re: [PATCH v2] vio: make remove callback return void
From: Arnd Bergmann @ 2021-02-25 14:01 UTC (permalink / raw)
To: Michael Ellerman
Cc: Cristobal Forno, Tyrel Datwyler, sparclinux, target-devel,
Paul Mackerras, Breno Leitão, Peter Huewe,
Sukadev Bhattiprolu, Jiri Slaby, Herbert Xu, linux-scsi,
Nayna Jain, Jason Gunthorpe, Michael Cyr, Jakub Kicinski,
Uwe Kleine-König, James E.J. Bottomley, linux-block,
Lijun Pan, Matt Mackall, Jens Axboe, Steven Royer,
Martin K. Petersen, Greg Kroah-Hartman,
linux-kernel@vger.kernel.org, Jarkko Sakkinen, Arnd Bergmann,
open list:HARDWARE RANDOM NUMBER GENERATOR CORE, Networking,
Dany Madden, Paulo Flabiano Smorigo, linux-integrity,
linuxppc-dev, David S. Miller
In-Reply-To: <87sg5ks6xp.fsf@mpe.ellerman.id.au>
On Thu, Feb 25, 2021 at 12:52 PM Michael Ellerman <mpe@ellerman.id.au> wrote:
>
> Uwe Kleine-König <uwe@kleine-koenig.org> writes:
> > The driver core ignores the return value of struct bus_type::remove()
> > because there is only little that can be done. To simplify the quest to
> > make this function return void, let struct vio_driver::remove() return
> > void, too. All users already unconditionally return 0, this commit makes
> > it obvious that returning an error code is a bad idea and makes it
> > obvious for future driver authors that returning an error code isn't
> > intended.
> >
> > Note there are two nominally different implementations for a vio bus:
> > one in arch/sparc/kernel/vio.c and the other in
> > arch/powerpc/platforms/pseries/vio.c. I didn't care to check which
> > driver is using which of these busses (or if even some of them can be
> > used with both) and simply adapt all drivers and the two bus codes in
> > one go.
>
> I'm 99% sure there's no connection between the two implementations,
> other than the name.
>
> So splitting the patch by arch would make it easier to merge. I'm
> reluctant to merge changes to sparc code.
The sparc subsystem clearly started out as a copy of the powerpc
version, and serves roughly the same purpose, but the communication
with the hypervisor is quite different.
As there are only four drivers for the sparc vio subsystem:
drivers/block/sunvdc.c
drivers/net/ethernet/sun/ldmvsw.c
drivers/net/ethernet/sun/sunvnet.c
drivers/tty/vcc.c
maybe it would make sense to rename those to use distinct
identifiers now?
Arnd
^ permalink raw reply
* [PATCH v2 37/37] KVM: PPC: Book3S HV: remove POWER9 support from P7/8 paths
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
This is dead code now.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 27 +-
arch/powerpc/kvm/book3s_hv_interrupts.S | 9 +-
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 404 +-----------------------
arch/powerpc/platforms/powernv/idle.c | 52 +--
4 files changed, 23 insertions(+), 469 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 10d5c7ea80ca..483a1a821ea4 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3040,8 +3040,7 @@ static void prepare_threads(struct kvmppc_vcore *vc)
for_each_runnable_thread(i, vcpu, vc) {
if (signal_pending(vcpu->arch.run_task))
vcpu->arch.ret = -EINTR;
- else if (no_mixing_hpt_and_radix &&
- kvm_is_radix(vc->kvm) != radix_enabled())
+ else if (kvm_is_radix(vc->kvm))
vcpu->arch.ret = -EINVAL;
else if (vcpu->arch.vpa.update_pending ||
vcpu->arch.slb_shadow.update_pending ||
@@ -3249,6 +3248,9 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
int trap;
bool is_power8;
+ if (WARN_ON_ONCE(cpu_has_feature(CPU_FTR_ARCH_300)))
+ return;
+
/*
* Remove from the list any threads that have a signal pending
* or need a VPA update done
@@ -3276,9 +3278,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
* Make sure we are running on primary threads, and that secondary
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
- * On POWER9, we need to be not in independent-threads mode if
- * this is a HPT guest on a radix host machine where the
- * CPU threads may not be in different MMU modes.
*/
if ((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
@@ -3302,18 +3301,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
if (vc->num_threads < target_threads)
collect_piggybacks(&core_info, target_threads);
- /*
- * On radix, arrange for TLB flushing if necessary.
- * This has to be done before disabling interrupts since
- * it uses smp_call_function().
- */
- pcpu = smp_processor_id();
- if (kvm_is_radix(vc->kvm)) {
- for (sub = 0; sub < core_info.n_subcores; ++sub)
- for_each_runnable_thread(i, vcpu, core_info.vc[sub])
- kvmppc_prepare_radix_vcpu(vcpu, pcpu);
- }
-
/*
* Hard-disable interrupts, and check resched flag and signals.
* If we need to reschedule or deliver a signal, clean up
@@ -3346,8 +3333,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
cmd_bit = stat_bit = 0;
split = core_info.n_subcores;
sip = NULL;
- is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S)
- && !cpu_has_feature(CPU_FTR_ARCH_300);
+ is_power8 = cpu_has_feature(CPU_FTR_ARCH_207S);
if (split > 1) {
sip = &split_info;
@@ -3630,8 +3616,7 @@ static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
}
/*
- * Virtual-mode guest entry for POWER9 and later when the host and
- * guest are both using the radix MMU. The LPIDR has already been set.
+ * Guest entry for POWER9 and later CPUs.
*/
static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 327417d79eac..4444f83cb133 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -58,7 +58,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/*
* Put whatever is in the decrementer into the
* hypervisor decrementer.
- * Because of a hardware deviation in P8 and P9,
+ * Because of a hardware deviation in P8,
* we need to set LPCR[HDICE] before writing HDEC.
*/
ld r5, HSTATE_KVM_VCORE(r13)
@@ -67,15 +67,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ori r8, r9, LPCR_HDICE
mtspr SPRN_LPCR, r8
isync
- andis. r0, r9, LPCR_LD@h
mfspr r8,SPRN_DEC
mftb r7
-BEGIN_FTR_SECTION
- /* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
- bne 32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
-32: mtspr SPRN_HDEC,r8
+ mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index be5742640780..4e9808a2c3a9 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -25,7 +25,6 @@
#include <asm/export.h>
#include <asm/tm.h>
#include <asm/opal.h>
-#include <asm/xive-regs.h>
#include <asm/thread_info.h>
#include <asm/asm-compat.h>
#include <asm/feature-fixups.h>
@@ -34,9 +33,7 @@
/* Sign-extend HDEC if not on POWER9 */
#define EXTEND_HDEC(reg) \
-BEGIN_FTR_SECTION; \
- extsw reg, reg; \
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
+ extsw reg, reg
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
@@ -56,8 +53,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_HFSCR (SFS-72)
#define STACK_SLOT_AMR (SFS-80)
#define STACK_SLOT_UAMOR (SFS-88)
-#define STACK_SLOT_DAWR1 (SFS-96)
-#define STACK_SLOT_DAWRX1 (SFS-104)
/*
* Call kvmppc_hv_entry in real mode.
@@ -347,7 +342,6 @@ kvm_secondary_got_guest:
LOAD_REG_ADDR(r6, decrementer_max)
ld r6, 0(r6)
mtspr SPRN_HDEC, r6
-BEGIN_FTR_SECTION
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
cmpdi r6, 0
@@ -359,7 +353,6 @@ BEGIN_FTR_SECTION
ld r0, KVM_SPLIT_LDBAR(r6)
mtspr SPRN_LDBAR, r0
isync
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
63:
/* Order load of vcpu after load of vcore */
lwsync
@@ -430,7 +423,6 @@ kvm_no_guest:
blr
53:
-BEGIN_FTR_SECTION
HMT_LOW
ld r5, HSTATE_KVM_VCORE(r13)
cmpdi r5, 0
@@ -445,14 +437,6 @@ BEGIN_FTR_SECTION
b kvm_unsplit_nap
60: HMT_MEDIUM
b kvm_secondary_got_guest
-FTR_SECTION_ELSE
- HMT_LOW
- ld r5, HSTATE_KVM_VCORE(r13)
- cmpdi r5, 0
- beq kvm_no_guest
- HMT_MEDIUM
- b kvm_secondary_got_guest
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
54: li r0, KVM_HWTHREAD_IN_KVM
stb r0, HSTATE_HWTHREAD_STATE(r13)
@@ -578,13 +562,11 @@ kvmppc_hv_entry:
bne 10f
lwz r7,KVM_LPID(r9)
-BEGIN_FTR_SECTION
ld r6,KVM_SDR1(r9)
li r0,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r0
ptesync
mtspr SPRN_SDR1,r6 /* switch to partition page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -664,16 +646,6 @@ kvmppc_got_guest:
mtspr SPRN_SPURR,r8
/* Save host values of some registers */
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_TIDR
- mfspr r6, SPRN_PSSCR
- mfspr r7, SPRN_PID
- std r5, STACK_SLOT_TID(r1)
- std r6, STACK_SLOT_PSSCR(r1)
- std r7, STACK_SLOT_PID(r1)
- mfspr r5, SPRN_HFSCR
- std r5, STACK_SLOT_HFSCR(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
mfspr r5, SPRN_CIABR
mfspr r6, SPRN_DAWR0
@@ -684,12 +656,6 @@ BEGIN_FTR_SECTION
std r7, STACK_SLOT_DAWRX0(r1)
std r8, STACK_SLOT_IAMR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- mfspr r6, SPRN_DAWR1
- mfspr r7, SPRN_DAWRX1
- std r6, STACK_SLOT_DAWR1(r1)
- std r7, STACK_SLOT_DAWRX1(r1)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
mfspr r5, SPRN_AMR
std r5, STACK_SLOT_AMR(r1)
@@ -707,13 +673,9 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -780,12 +742,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
ld r6, VCPU_DAWRX0(r4)
mtspr SPRN_DAWR0, r5
mtspr SPRN_DAWRX0, r6
-BEGIN_FTR_SECTION
- ld r5, VCPU_DAWR1(r4)
- ld r6, VCPU_DAWRX1(r4)
- mtspr SPRN_DAWR1, r5
- mtspr SPRN_DAWRX1, r6
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
1:
ld r7, VCPU_CIABR(r4)
ld r8, VCPU_TAR(r4)
@@ -803,7 +759,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
mtspr SPRN_BESCR, r6
mtspr SPRN_PID, r7
mtspr SPRN_WORT, r8
-BEGIN_FTR_SECTION
/* POWER8-only registers */
ld r5, VCPU_TCSCR(r4)
ld r6, VCPU_ACOP(r4)
@@ -814,18 +769,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_CSIGR, r7
mtspr SPRN_TACR, r8
nop
-FTR_SECTION_ELSE
- /* POWER9-only registers */
- ld r5, VCPU_TID(r4)
- ld r6, VCPU_PSSCR(r4)
- lbz r8, HSTATE_FAKE_SUSPEND(r13)
- oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
- rldimi r6, r8, PSSCR_FAKE_SUSPEND_LG, 63 - PSSCR_FAKE_SUSPEND_LG
- ld r7, VCPU_HFSCR(r4)
- mtspr SPRN_TIDR, r5
- mtspr SPRN_PSSCR, r6
- mtspr SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
ld r5, VCPU_SPRG0(r4)
@@ -918,93 +861,9 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
bdnz 1b
9:
-#ifdef CONFIG_KVM_XICS
- /* We are entering the guest on that thread, push VCPU to XIVE */
- ld r11, VCPU_XIVE_SAVED_STATE(r4)
- li r9, TM_QW1_OS
- lwz r8, VCPU_XIVE_CAM_WORD(r4)
- cmpwi r8, 0
- beq no_xive
- li r7, TM_QW1_OS + TM_WORD2
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 2f
- ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
- cmpldi cr1, r10, 0
- beq cr1, no_xive
- eieio
- stdx r11,r9,r10
- stwx r8,r7,r10
- b 3f
-2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
- cmpldi cr1, r10, 0
- beq cr1, no_xive
- eieio
- stdcix r11,r9,r10
- stwcix r8,r7,r10
-3: li r9, 1
- stb r9, VCPU_XIVE_PUSHED(r4)
- eieio
-
- /*
- * We clear the irq_pending flag. There is a small chance of a
- * race vs. the escalation interrupt happening on another
- * processor setting it again, but the only consequence is to
- * cause a spurrious wakeup on the next H_CEDE which is not an
- * issue.
- */
- li r0,0
- stb r0, VCPU_IRQ_PENDING(r4)
-
- /*
- * In single escalation mode, if the escalation interrupt is
- * on, we mask it.
- */
- lbz r0, VCPU_XIVE_ESC_ON(r4)
- cmpwi cr1, r0,0
- beq cr1, 1f
- li r9, XIVE_ESB_SET_PQ_01
- beq 4f /* in real mode? */
- ld r10, VCPU_XIVE_ESC_VADDR(r4)
- ldx r0, r10, r9
- b 5f
-4: ld r10, VCPU_XIVE_ESC_RADDR(r4)
- ldcix r0, r10, r9
-5: sync
-
- /* We have a possible subtle race here: The escalation interrupt might
- * have fired and be on its way to the host queue while we mask it,
- * and if we unmask it early enough (re-cede right away), there is
- * a theorical possibility that it fires again, thus landing in the
- * target queue more than once which is a big no-no.
- *
- * Fortunately, solving this is rather easy. If the above load setting
- * PQ to 01 returns a previous value where P is set, then we know the
- * escalation interrupt is somewhere on its way to the host. In that
- * case we simply don't clear the xive_esc_on flag below. It will be
- * eventually cleared by the handler for the escalation interrupt.
- *
- * Then, when doing a cede, we check that flag again before re-enabling
- * the escalation interrupt, and if set, we abort the cede.
- */
- andi. r0, r0, XIVE_ESB_VAL_P
- bne- 1f
-
- /* Now P is 0, we can clear the flag */
- li r0, 0
- stb r0, VCPU_XIVE_ESC_ON(r4)
-1:
-no_xive:
-#endif /* CONFIG_KVM_XICS */
-
deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
/* Check if we can deliver an external or decrementer interrupt now */
ld r0, VCPU_PENDING_EXC(r4)
-BEGIN_FTR_SECTION
- /* On POWER9, also check for emulated doorbell interrupt */
- lbz r3, VCPU_DBELL_REQ(r4)
- or r0, r0, r3
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
cmpdi r0, 0
beq 71f
mr r3, r4
@@ -1077,12 +936,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_PPR, r0
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-/* Move canary into DSISR to check for later */
-BEGIN_FTR_SECTION
- li r0, 0x7fff
- mtspr SPRN_HDSISR, r0
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
ld r6, VCPU_KVM(r4)
lbz r7, KVM_SECURE_GUEST(r6)
cmpdi r7, 0
@@ -1298,14 +1151,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Hypervisor doorbell - exit only if host IPI flag set */
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
bne 3f
-BEGIN_FTR_SECTION
- PPC_MSGSYNC
- lwsync
- /* always exit if we're running a nested guest */
- ld r0, VCPU_NESTED(r9)
- cmpdi r0, 0
- bne guest_exit_cont
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
beq maybe_reenter_guest
@@ -1335,43 +1180,6 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
mr r4, r9
bl kvmhv_accumulate_time
#endif
-#ifdef CONFIG_KVM_XICS
- /* We are exiting, pull the VP from the XIVE */
- lbz r0, VCPU_XIVE_PUSHED(r9)
- cmpwi cr0, r0, 0
- beq 1f
- li r7, TM_SPC_PULL_OS_CTX
- li r6, TM_QW1_OS
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 2f
- ld r10, HSTATE_XIVE_TIMA_VIRT(r13)
- cmpldi cr0, r10, 0
- beq 1f
- /* First load to pull the context, we ignore the value */
- eieio
- lwzx r11, r7, r10
- /* Second load to recover the context state (Words 0 and 1) */
- ldx r11, r6, r10
- b 3f
-2: ld r10, HSTATE_XIVE_TIMA_PHYS(r13)
- cmpldi cr0, r10, 0
- beq 1f
- /* First load to pull the context, we ignore the value */
- eieio
- lwzcix r11, r7, r10
- /* Second load to recover the context state (Words 0 and 1) */
- ldcix r11, r6, r10
-3: std r11, VCPU_XIVE_SAVED_STATE(r9)
- /* Fixup some of the state for the next load */
- li r10, 0
- li r0, 0xff
- stb r10, VCPU_XIVE_PUSHED(r9)
- stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
- stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
- eieio
-1:
-#endif /* CONFIG_KVM_XICS */
/*
* Possibly flush the link stack here, before we do a blr in
@@ -1426,12 +1234,6 @@ guest_bypass:
ld r3, HSTATE_KVM_VCORE(r13)
mfspr r5,SPRN_DEC
mftb r6
- /* On P9, if the guest has large decr enabled, don't sign extend */
-BEGIN_FTR_SECTION
- ld r4, VCORE_LPCR(r3)
- andis. r4, r4, LPCR_LD@h
- bne 16f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r5,r5
16: add r5,r5,r6
/* r5 is a guest timebase value here, convert to host TB */
@@ -1505,7 +1307,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r6, VCPU_BESCR(r9)
stw r7, VCPU_GUEST_PID(r9)
std r8, VCPU_WORT(r9)
-BEGIN_FTR_SECTION
mfspr r5, SPRN_TCSCR
mfspr r6, SPRN_ACOP
mfspr r7, SPRN_CSIGR
@@ -1514,17 +1315,6 @@ BEGIN_FTR_SECTION
std r6, VCPU_ACOP(r9)
std r7, VCPU_CSIGR(r9)
std r8, VCPU_TACR(r9)
-FTR_SECTION_ELSE
- mfspr r5, SPRN_TIDR
- mfspr r6, SPRN_PSSCR
- std r5, VCPU_TID(r9)
- rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
- rotldi r6, r6, 60
- std r6, VCPU_PSSCR(r9)
- /* Restore host HFSCR value */
- ld r7, STACK_SLOT_HFSCR(r1)
- mtspr SPRN_HFSCR, r7
-ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/*
* Restore various registers to 0, where non-zero values
* set by the guest could disrupt the host.
@@ -1532,13 +1322,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
li r0, 0
mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
-BEGIN_FTR_SECTION
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
sldi r0, r0, 31
mtspr SPRN_MMCRS, r0
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
/* Save and restore AMR, IAMR and UAMOR before turning on the MMU */
ld r8, STACK_SLOT_IAMR(r1)
@@ -1595,13 +1383,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -1647,28 +1431,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_DAWR0, r6
mtspr SPRN_DAWRX0, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
-BEGIN_FTR_SECTION
- ld r6, STACK_SLOT_DAWR1(r1)
- ld r7, STACK_SLOT_DAWRX1(r1)
- mtspr SPRN_DAWR1, r6
- mtspr SPRN_DAWRX1, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1)
-BEGIN_FTR_SECTION
- ld r5, STACK_SLOT_TID(r1)
- ld r6, STACK_SLOT_PSSCR(r1)
- ld r7, STACK_SLOT_PID(r1)
- mtspr SPRN_TIDR, r5
- mtspr SPRN_PSSCR, r6
- mtspr SPRN_PID, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
- /*
- * cp_abort is required if the processor supports local copy-paste
- * to clear the copy buffer that was under control of the guest.
- */
-BEGIN_FTR_SECTION
- PPC_CP_ABORT
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
/*
* POWER7/POWER8 guest -> host partition switch code.
@@ -1705,13 +1467,11 @@ kvmhv_switch_to_host:
/* Primary thread switches back to host partition */
lwz r7,KVM_HOST_LPID(r4)
-BEGIN_FTR_SECTION
ld r6,KVM_HOST_SDR1(r4)
li r8,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r8
ptesync
mtspr SPRN_SDR1,r6 /* switch to host page table */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -1922,20 +1682,11 @@ kvmppc_tm_emul:
kvmppc_hdsi:
mfspr r4, SPRN_HDAR
mfspr r6, SPRN_HDSISR
-BEGIN_FTR_SECTION
- /* Look for DSISR canary. If we find it, retry instruction */
- cmpdi r6, 0x7fff
- beq 6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
/* HPTE not found fault or protection fault? */
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
beq 1f /* if not, send it to the guest */
andi. r0, r11, MSR_DR /* data relocation enabled? */
beq 3f
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
- b 4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
clrrdi r0, r4, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
li r0, BOOK3S_INTERRUPT_DATA_SEGMENT
@@ -2012,10 +1763,6 @@ kvmppc_hisi:
beq 1f
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
beq 3f
-BEGIN_FTR_SECTION
- mfspr r5, SPRN_ASDR /* on POWER9, use ASDR to get VSID */
- b 4f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
clrrdi r0, r10, 28
PPC_SLBFEE_DOT(R5, R0) /* if so, look up SLB */
li r0, BOOK3S_INTERRUPT_INST_SEGMENT
@@ -2063,10 +1810,6 @@ hcall_try_real_mode:
andi. r0,r11,MSR_PR
/* sc 1 from userspace - reflect to guest syscall */
bne sc_1_fast_return
- /* sc 1 from nested guest - give it to L1 to handle */
- ld r0, VCPU_NESTED(r9)
- cmpdi r0, 0
- bne guest_exit_cont
clrrdi r3,r3,2
cmpldi r3,hcall_real_table_end - hcall_real_table
bge guest_exit_cont
@@ -2462,13 +2205,9 @@ _GLOBAL(kvmppc_h_cede) /* r3 = vcpu pointer, r11 = msr, r13 = paca */
bl kvmppc_save_fp
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -2488,15 +2227,8 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
-BEGIN_FTR_SECTION
- /* On P9 check whether the guest has large decrementer mode enabled */
- ld r6, HSTATE_KVM_VCORE(r13)
- ld r6, VCORE_LPCR(r6)
- andis. r6, r6, LPCR_LD@h
- bne 68f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r3, r3
-68: EXTEND_HDEC(r4)
+ EXTEND_HDEC(r4)
cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
@@ -2541,28 +2273,14 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
kvm_nap_sequence: /* desired LPCR value in r5 */
-BEGIN_FTR_SECTION
- /*
- * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset)
- * enable state loss = 1 (allow SMT mode switch)
- * requested level = 0 (just stop dispatching)
- */
- lis r3, (PSSCR_EC | PSSCR_ESL)@h
/* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
li r4, LPCR_PECE_HVEE@higher
sldi r4, r4, 32
or r5, r5, r4
-FTR_SECTION_ELSE
- li r3, PNV_THREAD_NAP
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r5
isync
-BEGIN_FTR_SECTION
- bl isa300_idle_stop_mayloss
-FTR_SECTION_ELSE
bl isa206_idle_insn_mayloss
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
mfspr r0, SPRN_CTRLF
ori r0, r0, 1
@@ -2581,10 +2299,8 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
beq kvm_end_cede
cmpwi r0, NAPPING_NOVCPU
beq kvm_novcpu_wakeup
-BEGIN_FTR_SECTION
cmpwi r0, NAPPING_UNSPLIT
beq kvm_unsplit_wakeup
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
twi 31,0,0 /* Nap state must not be zero */
33: mr r4, r3
@@ -2604,13 +2320,9 @@ kvm_end_cede:
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Branch around the call if both CPU_FTR_TM and
- * CPU_FTR_P9_TM_HV_ASSIST are off.
- */
BEGIN_FTR_SECTION
b 91f
-END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0)
+END_FTR_SECTION_IFCLR(CPU_FTR_TM)
/*
* NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS (but not CR)
*/
@@ -2700,47 +2412,7 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
-#ifdef CONFIG_KVM_XICS
- /* are we using XIVE with single escalation? */
- ld r10, VCPU_XIVE_ESC_VADDR(r9)
- cmpdi r10, 0
- beq 3f
- li r6, XIVE_ESB_SET_PQ_00
- /*
- * If we still have a pending escalation, abort the cede,
- * and we must set PQ to 10 rather than 00 so that we don't
- * potentially end up with two entries for the escalation
- * interrupt in the XIVE interrupt queue. In that case
- * we also don't want to set xive_esc_on to 1 here in
- * case we race with xive_esc_irq().
- */
- lbz r5, VCPU_XIVE_ESC_ON(r9)
- cmpwi r5, 0
- beq 4f
- li r0, 0
- stb r0, VCPU_CEDED(r9)
- /*
- * The escalation interrupts are special as we don't EOI them.
- * There is no need to use the load-after-store ordering offset
- * to set PQ to 10 as we won't use StoreEOI.
- */
- li r6, XIVE_ESB_SET_PQ_10
- b 5f
-4: li r0, 1
- stb r0, VCPU_XIVE_ESC_ON(r9)
- /* make sure store to xive_esc_on is seen before xive_esc_irq runs */
- sync
-5: /* Enable XIVE escalation */
- mfmsr r0
- andi. r0, r0, MSR_DR /* in real mode? */
- beq 1f
- ldx r0, r10, r6
- b 2f
-1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
- ldcix r0, r10, r6
-2: sync
-#endif /* CONFIG_KVM_XICS */
-3: b guest_exit_cont
+ b guest_exit_cont
/* Try to do machine check recovery in real mode */
machine_check_realmode:
@@ -2817,10 +2489,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
PPC_MSGCLR(6)
/* see if it's a host IPI */
li r3, 1
-BEGIN_FTR_SECTION
- PPC_MSGSYNC
- lwsync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
lbz r0, HSTATE_HOST_IPI(r13)
cmpwi r0, 0
bnelr
@@ -3129,70 +2797,12 @@ kvmppc_bad_host_intr:
std r3, STACK_FRAME_OVERHEAD-16(r1)
/*
- * On POWER9 do a minimal restore of the MMU and call C code,
- * which will print a message and panic.
* XXX On POWER7 and POWER8, we just spin here since we don't
* know what the other threads are doing (and we don't want to
* coordinate with them) - but at least we now have register state
* in memory that we might be able to look at from another CPU.
*/
-BEGIN_FTR_SECTION
b .
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
- ld r9, HSTATE_KVM_VCPU(r13)
- ld r10, VCPU_KVM(r9)
-
- li r0, 0
- mtspr SPRN_AMR, r0
- mtspr SPRN_IAMR, r0
- mtspr SPRN_CIABR, r0
- mtspr SPRN_DAWRX0, r0
-BEGIN_FTR_SECTION
- mtspr SPRN_DAWRX1, r0
-END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
-
- /* Clear guest SLB. */
- slbmte r0, r0
- PPC_SLBIA(6)
- ptesync
-
- /* load host SLB entries */
- ld r8, PACA_SLBSHADOWPTR(r13)
- .rept SLB_NUM_BOLTED
- li r3, SLBSHADOW_SAVEAREA
- LDX_BE r5, r8, r3
- addi r3, r3, 8
- LDX_BE r6, r8, r3
- andis. r7, r5, SLB_ESID_V@h
- beq 3f
- slbmte r6, r5
-3: addi r8, r8, 16
- .endr
-
- lwz r7, KVM_HOST_LPID(r10)
- mtspr SPRN_LPID, r7
- mtspr SPRN_PID, r0
- ld r8, KVM_HOST_LPCR(r10)
- mtspr SPRN_LPCR, r8
- isync
- li r0, KVM_GUEST_MODE_NONE
- stb r0, HSTATE_IN_GUEST(r13)
-
- /*
- * Turn on the MMU and jump to C code
- */
- bcl 20, 31, .+4
-5: mflr r3
- addi r3, r3, 9f - 5b
- li r4, -1
- rldimi r3, r4, 62, 0 /* ensure 0xc000000000000000 bits are set */
- ld r4, PACAKMSR(r13)
- mtspr SPRN_SRR0, r3
- mtspr SPRN_SRR1, r4
- RFI_TO_KERNEL
-9: addi r3, r1, STACK_FRAME_OVERHEAD
- bl kvmppc_bad_interrupt
- b 9b
/*
* This mimics the MSR transition on IRQ delivery. The new guest MSR is taken
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 999997d9e9a9..528a7e0cf83a 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -604,7 +604,7 @@ struct p9_sprs {
u64 uamor;
};
-static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power9_idle_stop(unsigned long psscr)
{
int cpu = raw_smp_processor_id();
int first = cpu_first_thread_sibling(cpu);
@@ -620,8 +620,6 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
/* EC=ESL=0 case */
- BUG_ON(!mmu_on);
-
/*
* Wake synchronously. SRESET via xscom may still cause
* a 0x100 powersave wakeup with SRR1 reason!
@@ -803,8 +801,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
__slb_restore_bolted_realmode();
out:
- if (mmu_on)
- mtmsr(MSR_KERNEL);
+ mtmsr(MSR_KERNEL);
return srr1;
}
@@ -895,7 +892,7 @@ struct p10_sprs {
*/
};
-static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
+static unsigned long power10_idle_stop(unsigned long psscr)
{
int cpu = raw_smp_processor_id();
int first = cpu_first_thread_sibling(cpu);
@@ -909,8 +906,6 @@ static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
if (!(psscr & (PSSCR_EC|PSSCR_ESL))) {
/* EC=ESL=0 case */
- BUG_ON(!mmu_on);
-
/*
* Wake synchronously. SRESET via xscom may still cause
* a 0x100 powersave wakeup with SRR1 reason!
@@ -991,8 +986,7 @@ static unsigned long power10_idle_stop(unsigned long psscr, bool mmu_on)
__slb_restore_bolted_realmode();
out:
- if (mmu_on)
- mtmsr(MSR_KERNEL);
+ mtmsr(MSR_KERNEL);
return srr1;
}
@@ -1002,40 +996,10 @@ static unsigned long arch300_offline_stop(unsigned long psscr)
{
unsigned long srr1;
-#ifndef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- __ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, true);
+ srr1 = power10_idle_stop(psscr);
else
- srr1 = power9_idle_stop(psscr, true);
- __ppc64_runlatch_on();
-#else
- /*
- * Tell KVM we're entering idle.
- * This does not have to be done in real mode because the P9 MMU
- * is independent per-thread. Some steppings share radix/hash mode
- * between threads, but in that case KVM has a barrier sync in real
- * mode before and after switching between radix and hash.
- *
- * kvm_start_guest must still be called in real mode though, hence
- * the false argument.
- */
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_IDLE;
-
- __ppc64_runlatch_off();
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, false);
- else
- srr1 = power9_idle_stop(psscr, false);
- __ppc64_runlatch_on();
-
- local_paca->kvm_hstate.hwthread_state = KVM_HWTHREAD_IN_KERNEL;
- /* Order setting hwthread_state vs. testing hwthread_req */
- smp_mb();
- if (local_paca->kvm_hstate.hwthread_req)
- srr1 = idle_kvm_start_guest(srr1);
- mtmsr(MSR_KERNEL);
-#endif
+ srr1 = power9_idle_stop(psscr);
return srr1;
}
@@ -1055,9 +1019,9 @@ void arch300_idle_type(unsigned long stop_psscr_val,
__ppc64_runlatch_off();
if (cpu_has_feature(CPU_FTR_ARCH_31))
- srr1 = power10_idle_stop(psscr, true);
+ srr1 = power10_idle_stop(psscr);
else
- srr1 = power9_idle_stop(psscr, true);
+ srr1 = power9_idle_stop(psscr);
__ppc64_runlatch_on();
fini_irq_for_idle_irqsoff();
--
2.23.0
^ permalink raw reply related
* [PATCH v2 36/37] KVM: PPC: Book3S HV P9: implement hash host / hash guest support
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 2 +-
arch/powerpc/kvm/book3s_hv_interrupt.c | 75 ++++++++++++++++----------
2 files changed, 47 insertions(+), 30 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 97320531f37c..10d5c7ea80ca 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4403,7 +4403,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
* The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
* path, which also handles hash and dependent threads mode.
*/
- if (radix_enabled())
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
vcpu->arch.vcore->lpcr);
else
diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c
index d79c6f4f330c..af4772755e5d 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupt.c
+++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
@@ -140,12 +140,51 @@ static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64
}
-static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
+static void switch_mmu_to_host(struct kvm *kvm, u32 pid)
{
mtspr(SPRN_PID, pid);
mtspr(SPRN_LPID, kvm->arch.host_lpid);
mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
isync();
+
+ /* XXX: could save and restore host SLBs to reduce SLB faults */
+ if (!radix_enabled())
+ slb_restore_bolted_realmode();
+}
+
+static void save_host_mmu(struct kvm *kvm)
+{
+ if (!radix_enabled()) {
+ mtslb(0, 0, 0);
+ slb_invalidate(6);
+ }
+}
+
+static void save_guest_mmu(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+ if (kvm_is_radix(kvm)) {
+ radix_clear_slb();
+ } else {
+ int i;
+ int nr = 0;
+
+ /*
+ * This must run before switching to host (radix host can't
+ * access all SLBs).
+ */
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ u64 slbee, slbev;
+ mfslb(i, &slbee, &slbev);
+ if (slbee & SLB_ESID_V) {
+ vcpu->arch.slb[nr].orige = slbee | i;
+ vcpu->arch.slb[nr].origv = slbev;
+ nr++;
+ }
+ }
+ vcpu->arch.slb_max = nr;
+ mtslb(0, 0, 0);
+ slb_invalidate(6);
+ }
}
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
@@ -252,15 +291,16 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
mtspr(SPRN_AMOR, ~0UL);
+ if (!radix_enabled() || !kvm_is_radix(kvm) || cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+
+ save_host_mmu(kvm);
if (kvm_is_radix(kvm)) {
- if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
- __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
__mtmsrd(0, 1); /* clear RI */
} else {
- __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
}
@@ -437,31 +477,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
/* HDEC must be at least as large as DEC, so decrementer_max fits */
mtspr(SPRN_HDEC, decrementer_max);
- if (kvm_is_radix(kvm)) {
- radix_clear_slb();
- } else {
- int i;
- int nr = 0;
-
- /*
- * This must run before switching to host (radix host can't
- * access all SLBs).
- */
- for (i = 0; i < vcpu->arch.slb_nr; i++) {
- u64 slbee, slbev;
- mfslb(i, &slbee, &slbev);
- if (slbee & SLB_ESID_V) {
- vcpu->arch.slb[nr].orige = slbee | i;
- vcpu->arch.slb[nr].origv = slbev;
- nr++;
- }
- }
- vcpu->arch.slb_max = nr;
- mtslb(0, 0, 0);
- slb_invalidate(6);
- }
-
- switch_mmu_to_host_radix(kvm, host_pidr);
+ save_guest_mmu(kvm, vcpu);
+ switch_mmu_to_host(kvm, host_pidr);
/*
* If we are in real mode, don't switch MMU on until the MMU is
--
2.23.0
^ permalink raw reply related
* [PATCH v2 35/37] KVM: PPC: Book3S HV P9: implement hash guest support
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 20 ++--
arch/powerpc/kvm/book3s_hv_interrupt.c | 123 +++++++++++++++++-------
arch/powerpc/kvm/book3s_hv_rm_mmu.c | 4 +
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 14 +--
4 files changed, 109 insertions(+), 52 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1bbc46f2cfbf..97320531f37c 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3765,7 +3765,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
}
kvmppc_xive_pull_vcpu(vcpu);
- vcpu->arch.slb_max = 0;
+ if (kvm_is_radix(vcpu->kvm))
+ vcpu->arch.slb_max = 0;
}
dec = mfspr(SPRN_DEC);
@@ -3998,7 +3999,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
/*
* This never fails for a radix guest, as none of the operations it does
* for a radix guest can fail or have a way to report failure.
- * kvmhv_run_single_vcpu() relies on this fact.
*/
static int kvmhv_setup_mmu(struct kvm_vcpu *vcpu)
{
@@ -4177,8 +4177,15 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
vc->runner = vcpu;
/* See if the MMU is ready to go */
- if (!kvm->arch.mmu_ready)
- kvmhv_setup_mmu(vcpu);
+ if (!kvm->arch.mmu_ready) {
+ r = kvmhv_setup_mmu(vcpu);
+ if (r) {
+ run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ run->fail_entry.hardware_entry_failure_reason = 0;
+ vcpu->arch.ret = r;
+ return r;
+ }
+ }
if (need_resched())
cond_resched();
@@ -4191,7 +4198,8 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
preempt_disable();
pcpu = smp_processor_id();
vc->pcpu = pcpu;
- kvmppc_prepare_radix_vcpu(vcpu, pcpu);
+ if (kvm_is_radix(kvm))
+ kvmppc_prepare_radix_vcpu(vcpu, pcpu);
local_irq_disable();
hard_irq_disable();
@@ -4395,7 +4403,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
* The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
* path, which also handles hash and dependent threads mode.
*/
- if (kvm_is_radix(kvm))
+ if (radix_enabled())
r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
vcpu->arch.vcore->lpcr);
else
diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c
index 9784da3f8565..d79c6f4f330c 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupt.c
+++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
@@ -55,44 +55,25 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator
#define accumulate_time(vcpu, next) do {} while (0)
#endif
-static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
-{
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
- struct kvm_nested_guest *nested = vcpu->arch.nested;
- u32 lpid;
-
- lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
-
- mtspr(SPRN_LPID, lpid);
- mtspr(SPRN_LPCR, lpcr);
- mtspr(SPRN_PID, vcpu->arch.pid);
- isync();
-
- /* TLBIEL must have LPIDR set, so set guest LPID before flushing. */
- kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
-}
-
-static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
-{
- mtspr(SPRN_PID, pid);
- mtspr(SPRN_LPID, kvm->arch.host_lpid);
- mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
- isync();
-}
-
static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
{
asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx));
asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx));
}
+static inline void __mtslb(u64 slbee, u64 slbev)
+{
+ asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
+}
+
static inline void mtslb(unsigned int idx, u64 slbee, u64 slbev)
{
BUG_ON((slbee & 0xfff) != idx);
- asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
+ __mtslb(slbee, slbev);
}
+
static inline void slb_invalidate(unsigned int ih)
{
asm volatile("slbia %0" :: "i"(ih));
@@ -119,6 +100,54 @@ static void radix_clear_slb(void)
}
}
+static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ u32 lpid;
+
+ lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+
+ mtspr(SPRN_LPID, lpid);
+ mtspr(SPRN_LPCR, lpcr);
+ mtspr(SPRN_PID, vcpu->arch.pid);
+ isync();
+
+ /* TLBIEL must have LPIDR set, so set guest LPID before flushing. */
+ kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
+}
+
+static void switch_mmu_to_guest_hpt(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ u32 lpid;
+ int i;
+
+ BUG_ON(nested);
+
+ lpid = kvm->arch.lpid;
+
+ mtspr(SPRN_LPID, lpid);
+ mtspr(SPRN_LPCR, lpcr);
+ mtspr(SPRN_PID, vcpu->arch.pid);
+
+ for (i = 0; i < vcpu->arch.slb_max; i++)
+ __mtslb(vcpu->arch.slb[i].orige, vcpu->arch.slb[i].origv);
+
+ isync();
+
+ /* XXX: TLBIEL not virtualised for HPT guests */
+}
+
+
+static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
+{
+ mtspr(SPRN_PID, pid);
+ mtspr(SPRN_LPID, kvm->arch.host_lpid);
+ mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
+ isync();
+}
+
int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
{
struct kvm *kvm = vcpu->kvm;
@@ -223,10 +252,17 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
mtspr(SPRN_AMOR, ~0UL);
- if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
- __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+ if (kvm_is_radix(kvm)) {
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+ switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+ if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(0, 1); /* clear RI */
- switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+ } else {
+ __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+ switch_mmu_to_guest_hpt(kvm, vcpu, lpcr);
+ }
/*
* P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
@@ -234,9 +270,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
*/
mtspr(SPRN_HDEC, hdec);
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
- __mtmsrd(0, 1); /* clear RI */
-
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
@@ -340,8 +373,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
#endif
}
- radix_clear_slb();
-
accumulate_time(vcpu, &vcpu->arch.rm_exit);
/* Advance host PURR/SPURR by the amount used by guest */
@@ -406,6 +437,30 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
/* HDEC must be at least as large as DEC, so decrementer_max fits */
mtspr(SPRN_HDEC, decrementer_max);
+ if (kvm_is_radix(kvm)) {
+ radix_clear_slb();
+ } else {
+ int i;
+ int nr = 0;
+
+ /*
+ * This must run before switching to host (radix host can't
+ * access all SLBs).
+ */
+ for (i = 0; i < vcpu->arch.slb_nr; i++) {
+ u64 slbee, slbev;
+ mfslb(i, &slbee, &slbev);
+ if (slbee & SLB_ESID_V) {
+ vcpu->arch.slb[nr].orige = slbee | i;
+ vcpu->arch.slb[nr].origv = slbev;
+ nr++;
+ }
+ }
+ vcpu->arch.slb_max = nr;
+ mtslb(0, 0, 0);
+ slb_invalidate(6);
+ }
+
switch_mmu_to_host_radix(kvm, host_pidr);
/*
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 7af7c70f1468..b5532b85c7fd 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -57,6 +57,10 @@ static int global_invalidates(struct kvm *kvm)
else
global = 1;
+ /* LPID has been switched to host if in virt mode so can't do local */
+ if (!global && (mfmsr() & (MSR_IR|MSR_DR)))
+ global = 1;
+
if (!global) {
/* any other core might now have stale TLB entries... */
smp_wmb();
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index a8ce68eed13e..be5742640780 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -899,14 +899,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
- /* For hash guest, clear out and reload the SLB */
-BEGIN_MMU_FTR_SECTION
- /* Radix host won't have populated the SLB, so no need to clear */
+ /* Clear out and reload the SLB */
li r6, 0
slbmte r6, r6
PPC_SLBIA(6)
ptesync
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
/* Load up guest SLB entries (N.B. slb_max will be 0 for radix) */
lwz r5,VCPU_SLB_MAX(r4)
@@ -1408,9 +1405,6 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
stw r5,VCPU_SLB_MAX(r9)
/* load host SLB entries */
-BEGIN_MMU_FTR_SECTION
- b guest_bypass
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
ld r8,PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
@@ -3162,10 +3156,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
PPC_SLBIA(6)
ptesync
-BEGIN_MMU_FTR_SECTION
- b 4f
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
-
/* load host SLB entries */
ld r8, PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
@@ -3179,7 +3169,7 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
3: addi r8, r8, 16
.endr
-4: lwz r7, KVM_HOST_LPID(r10)
+ lwz r7, KVM_HOST_LPID(r10)
mtspr SPRN_LPID, r7
mtspr SPRN_PID, r0
ld r8, KVM_HOST_LPCR(r10)
--
2.23.0
^ permalink raw reply related
* [PATCH v2 34/37] KVM: PPC: Book3S HV: add virtual mode handlers for HPT hcalls and page faults
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
In order to support hash guests in the P9 path (which does not do real
mode hcalls or page fault handling), these real-mode hash specific
interrupts need to be implemented in virt mode.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 118 +++++++++++++++++++++++++++++++++--
1 file changed, 113 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 9d2fa21201c1..1bbc46f2cfbf 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -935,6 +935,52 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
return RESUME_HOST;
switch (req) {
+ case H_REMOVE:
+ ret = kvmppc_h_remove(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_ENTER:
+ ret = kvmppc_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6),
+ kvmppc_get_gpr(vcpu, 7));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_READ:
+ ret = kvmppc_h_read(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_MOD:
+ ret = kvmppc_h_clear_mod(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_CLEAR_REF:
+ ret = kvmppc_h_clear_ref(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_PROTECT:
+ ret = kvmppc_h_protect(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5),
+ kvmppc_get_gpr(vcpu, 6));
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+ case H_BULK_REMOVE:
+ ret = kvmppc_h_bulk_remove(vcpu);
+ if (ret == H_TOO_HARD)
+ return RESUME_HOST;
+ break;
+
case H_CEDE:
break;
case H_PROD:
@@ -1134,6 +1180,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
default:
return RESUME_HOST;
}
+ WARN_ON_ONCE(ret == H_TOO_HARD);
kvmppc_set_gpr(vcpu, 3, ret);
vcpu->arch.hcall_needed = 0;
return RESUME_GUEST;
@@ -1420,19 +1467,80 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
* host page has been paged out. Any other HDSI/HISI interrupts
* have been handled already.
*/
- case BOOK3S_INTERRUPT_H_DATA_STORAGE:
- r = RESUME_PAGE_FAULT;
- if (vcpu->arch.fault_dsisr == HDSISR_CANARY)
+ case BOOK3S_INTERRUPT_H_DATA_STORAGE: {
+ unsigned long vsid;
+ long err;
+
+ if (vcpu->arch.fault_dsisr == HDSISR_CANARY) {
r = RESUME_GUEST; /* Just retry if it's the canary */
+ break;
+ }
+
+ if (kvm_is_radix(vcpu->kvm)) {
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & (DSISR_NOHPTE | DSISR_PROTFAULT))) {
+ kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dar, vcpu->arch.fault_dsisr);
+ r = RESUME_GUEST;
+ break;
+ }
+ if (!(vcpu->arch.shregs.msr & MSR_DR)) {
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ } else {
+ vsid = vcpu->arch.fault_gpa;
+ }
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, true);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1 || err == -2) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_data_storage(vcpu, vcpu->arch.fault_dar, err);
+ r = RESUME_GUEST;
+ }
break;
- case BOOK3S_INTERRUPT_H_INST_STORAGE:
+ }
+ case BOOK3S_INTERRUPT_H_INST_STORAGE: {
+ unsigned long vsid;
+ long err;
+
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
vcpu->arch.fault_dsisr = vcpu->arch.shregs.msr &
DSISR_SRR1_MATCH_64S;
if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE)
vcpu->arch.fault_dsisr |= DSISR_ISSTORE;
- r = RESUME_PAGE_FAULT;
+ if (kvm_is_radix(vcpu->kvm)) {
+ r = RESUME_PAGE_FAULT;
+ break;
+ }
+
+ if (!(vcpu->arch.fault_dsisr & SRR1_ISI_NOPT)) {
+ /* XXX: clear DSISR_ISSTORE? */
+ kvmppc_core_queue_inst_storage(vcpu, vcpu->arch.fault_dsisr);
+ r = RESUME_GUEST;
+ break;
+ }
+ if (!(vcpu->arch.shregs.msr & MSR_DR)) {
+ vsid = vcpu->kvm->arch.vrma_slb_v;
+ } else {
+ vsid = vcpu->arch.fault_gpa;
+ }
+ err = kvmppc_hpte_hv_fault(vcpu, vcpu->arch.fault_dar,
+ vsid, vcpu->arch.fault_dsisr, false);
+ if (err == 0) {
+ r = RESUME_GUEST;
+ } else if (err == -1) {
+ r = RESUME_PAGE_FAULT;
+ } else {
+ kvmppc_core_queue_inst_storage(vcpu, err);
+ r = RESUME_GUEST;
+ }
break;
+ }
+
/*
* This occurs if the guest executes an illegal instruction.
* If the guest debug is disabled, generate a program interrupt
--
2.23.0
^ permalink raw reply related
* [PATCH v2 33/37] KVM: PPC: Book3S HV: small pseries_do_hcall cleanup
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
Functionality should not be changed.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 29 +++++++++++++++--------------
1 file changed, 15 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1f27187ff1e7..9d2fa21201c1 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -923,6 +923,7 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
int yield_count;
@@ -938,7 +939,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
case H_PROD:
target = kvmppc_get_gpr(vcpu, 4);
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
@@ -952,7 +953,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
target = kvmppc_get_gpr(vcpu, 4);
if (target == -1)
break;
- tvcpu = kvmppc_find_vcpu(vcpu->kvm, target);
+ tvcpu = kvmppc_find_vcpu(kvm, target);
if (!tvcpu) {
ret = H_PARAMETER;
break;
@@ -968,12 +969,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
kvmppc_get_gpr(vcpu, 6));
break;
case H_RTAS:
- if (list_empty(&vcpu->kvm->arch.rtas_tokens))
+ if (list_empty(&kvm->arch.rtas_tokens))
return RESUME_HOST;
- idx = srcu_read_lock(&vcpu->kvm->srcu);
+ idx = srcu_read_lock(&kvm->srcu);
rc = kvmppc_rtas_hcall(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ srcu_read_unlock(&kvm->srcu, idx);
if (rc == -ENOENT)
return RESUME_HOST;
@@ -1060,12 +1061,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SET_PARTITION_TABLE:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_set_partition_table(vcpu);
break;
case H_ENTER_NESTED:
ret = H_FUNCTION;
- if (!nesting_enabled(vcpu->kvm))
+ if (!nesting_enabled(kvm))
break;
ret = kvmhv_enter_nested_guest(vcpu);
if (ret == H_INTERRUPT) {
@@ -1080,12 +1081,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
break;
case H_TLB_INVALIDATE:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_do_nested_tlbie(vcpu);
break;
case H_COPY_TOFROM_GUEST:
ret = H_FUNCTION;
- if (nesting_enabled(vcpu->kvm))
+ if (nesting_enabled(kvm))
ret = kvmhv_copy_tofrom_guest_nested(vcpu);
break;
case H_PAGE_INIT:
@@ -1096,7 +1097,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_IN:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_page_in(vcpu->kvm,
+ ret = kvmppc_h_svm_page_in(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
@@ -1104,7 +1105,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_PAGE_OUT:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_page_out(vcpu->kvm,
+ ret = kvmppc_h_svm_page_out(kvm,
kvmppc_get_gpr(vcpu, 4),
kvmppc_get_gpr(vcpu, 5),
kvmppc_get_gpr(vcpu, 6));
@@ -1112,12 +1113,12 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
case H_SVM_INIT_START:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_init_start(vcpu->kvm);
+ ret = kvmppc_h_svm_init_start(kvm);
break;
case H_SVM_INIT_DONE:
ret = H_UNSUPPORTED;
if (kvmppc_get_srr1(vcpu) & MSR_S)
- ret = kvmppc_h_svm_init_done(vcpu->kvm);
+ ret = kvmppc_h_svm_init_done(kvm);
break;
case H_SVM_INIT_ABORT:
/*
@@ -1127,7 +1128,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
* Instead the kvm->arch.secure_guest flag is checked inside
* kvmppc_h_svm_init_abort().
*/
- ret = kvmppc_h_svm_init_abort(vcpu->kvm);
+ ret = kvmppc_h_svm_init_abort(kvm);
break;
default:
--
2.23.0
^ permalink raw reply related
* [PATCH v2 32/37] KVM: PPC: Book3S HV: Remove radix guest support from P7/8 path
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
The P9 path will run all supported radix guest combinations now, so
remove support from the old path.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 65 ++-----------------------
1 file changed, 3 insertions(+), 62 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 61f71a7df238..a8ce68eed13e 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -899,11 +899,6 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
- ld r6, VCPU_KVM(r4)
- lbz r0, KVM_RADIX(r6)
- cmpwi r0, 0
- bne 9f
-
/* For hash guest, clear out and reload the SLB */
BEGIN_MMU_FTR_SECTION
/* Radix host won't have populated the SLB, so no need to clear */
@@ -1389,11 +1384,7 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
patch_site 1b patch__call_kvm_flush_link_stack
/* For hash guest, read the guest SLB and save it away */
- ld r5, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r5)
li r5, 0
- cmpwi r0, 0
- bne 0f /* for radix, save 0 entries */
lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */
mtctr r0
li r6,0
@@ -1432,23 +1423,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
slbmte r6,r5
1: addi r8,r8,16
.endr
- b guest_bypass
-
-0: /*
- * Malicious or buggy radix guests may have inserted SLB entries
- * (only 0..3 because radix always runs with UPRT=1), so these must
- * be cleared here to avoid side-channels. slbmte is used rather
- * than slbia, as it won't clear cached translations.
- */
- li r0,0
- stw r0,VCPU_SLB_MAX(r9)
- slbmte r0,r0
- li r4,1
- slbmte r0,r4
- li r4,2
- slbmte r0,r4
- li r4,3
- slbmte r0,r4
guest_bypass:
stw r12, STACK_SLOT_TRAP(r1)
@@ -1694,24 +1668,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_PID, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-#ifdef CONFIG_PPC_RADIX_MMU
- /*
- * Are we running hash or radix ?
- */
- ld r5, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r5)
- cmpwi cr2, r0, 0
- beq cr2, 2f
-
- /*
- * Radix: do eieio; tlbsync; ptesync sequence in case we
- * interrupted the guest between a tlbie and a ptesync.
- */
- eieio
- tlbsync
- ptesync
-#endif /* CONFIG_PPC_RADIX_MMU */
-
/*
* cp_abort is required if the processor supports local copy-paste
* to clear the copy buffer that was under control of the guest.
@@ -1970,8 +1926,6 @@ kvmppc_tm_emul:
* reflect the HDSI to the guest as a DSI.
*/
kvmppc_hdsi:
- ld r3, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r3)
mfspr r4, SPRN_HDAR
mfspr r6, SPRN_HDSISR
BEGIN_FTR_SECTION
@@ -1979,8 +1933,6 @@ BEGIN_FTR_SECTION
cmpdi r6, 0x7fff
beq 6f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
- cmpwi r0, 0
- bne .Lradix_hdsi /* on radix, just save DAR/DSISR/ASDR */
/* HPTE not found fault or protection fault? */
andis. r0, r6, (DSISR_NOHPTE | DSISR_PROTFAULT)@h
beq 1f /* if not, send it to the guest */
@@ -2057,23 +2009,11 @@ fast_interrupt_c_return:
stb r0, HSTATE_IN_GUEST(r13)
b guest_exit_cont
-.Lradix_hdsi:
- std r4, VCPU_FAULT_DAR(r9)
- stw r6, VCPU_FAULT_DSISR(r9)
-.Lradix_hisi:
- mfspr r5, SPRN_ASDR
- std r5, VCPU_FAULT_GPA(r9)
- b guest_exit_cont
-
/*
* Similarly for an HISI, reflect it to the guest as an ISI unless
* it is an HPTE not found fault for a page that we have paged out.
*/
kvmppc_hisi:
- ld r3, VCPU_KVM(r9)
- lbz r0, KVM_RADIX(r3)
- cmpwi r0, 0
- bne .Lradix_hisi /* for radix, just save ASDR */
andis. r0, r11, SRR1_ISI_NOPT@h
beq 1f
andi. r0, r11, MSR_IR /* instruction relocation enabled? */
@@ -3217,15 +3157,16 @@ BEGIN_FTR_SECTION
mtspr SPRN_DAWRX1, r0
END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
- /* Clear hash and radix guest SLB. */
+ /* Clear guest SLB. */
slbmte r0, r0
PPC_SLBIA(6)
+ ptesync
BEGIN_MMU_FTR_SECTION
b 4f
END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
- ptesync
+ /* load host SLB entries */
ld r8, PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
li r3, SLBSHADOW_SAVEAREA
--
2.23.0
^ permalink raw reply related
* [PATCH v2 31/37] KVM: PPC: Book3S HV: Remove support for dependent threads mode on P9
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
Radix guest support will be removed from the P7/8 path, so disallow
dependent threads mode on P9.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/kvm_host.h | 1 -
arch/powerpc/kvm/book3s_hv.c | 27 +++++----------------------
2 files changed, 5 insertions(+), 23 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 05fb00d37609..dd017dfa4e65 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -304,7 +304,6 @@ struct kvm_arch {
u8 fwnmi_enabled;
u8 secure_guest;
u8 svm_enabled;
- bool threads_indep;
bool nested_enable;
bool dawr1_enabled;
pgd_t *pgtable;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index c3064075f1d7..1f27187ff1e7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -103,13 +103,9 @@ static int target_smt_mode;
module_param(target_smt_mode, int, 0644);
MODULE_PARM_DESC(target_smt_mode, "Target threads per core (0 = max)");
-static bool indep_threads_mode = true;
-module_param(indep_threads_mode, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(indep_threads_mode, "Independent-threads mode (only on POWER9)");
-
static bool one_vm_per_core;
module_param(one_vm_per_core, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires indep_threads_mode=N)");
+MODULE_PARM_DESC(one_vm_per_core, "Only run vCPUs from the same VM on a core (requires POWER8 or older)");
#ifdef CONFIG_KVM_XICS
static const struct kernel_param_ops module_param_ops = {
@@ -2201,7 +2197,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
*/
static int threads_per_vcore(struct kvm *kvm)
{
- if (kvm->arch.threads_indep)
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
return 1;
return threads_per_subcore;
}
@@ -4290,7 +4286,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
* The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
* path, which also handles hash and dependent threads mode.
*/
- if (kvm->arch.threads_indep && kvm_is_radix(kvm))
+ if (kvm_is_radix(kvm))
r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
vcpu->arch.vcore->lpcr);
else
@@ -4910,21 +4906,8 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/*
* Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online.
- * On POWER9, we only need to do this if the "indep_threads_mode"
- * module parameter has been set to N.
*/
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
- pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
- kvm->arch.threads_indep = true;
- } else if (!indep_threads_mode && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
- pr_warn("KVM: Ignoring indep_threads_mode=N on pre-DD2.2 POWER9\n");
- kvm->arch.threads_indep = true;
- } else {
- kvm->arch.threads_indep = indep_threads_mode;
- }
- }
- if (!kvm->arch.threads_indep)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_activated();
/*
@@ -4965,7 +4948,7 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
{
debugfs_remove_recursive(kvm->arch.debugfs_dir);
- if (!kvm->arch.threads_indep)
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
kvm_hv_vm_deactivated();
kvmppc_free_vcores(kvm);
--
2.23.0
^ permalink raw reply related
* [PATCH v2 30/37] KVM: PPC: Book3S HV: Implement radix prefetch workaround by disabling MMU
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
Rather than partition the guest PID space and catch and flush a rogue
guest, instead work around this issue by ensuring the MMU is always
disabled in HV mode while the guest MMU context is switched in.
This may be a bit less efficient, but it is a lot less complicated and
allows the P9 path to trivally implement the workaround too. Newer CPUs
are not subject to this issue.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/mmu_context.h | 6 ----
arch/powerpc/kvm/book3s_hv.c | 10 ++++--
arch/powerpc/kvm/book3s_hv_interrupt.c | 14 ++++++--
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 34 ------------------
arch/powerpc/mm/book3s64/radix_pgtable.c | 27 +++++---------
arch/powerpc/mm/book3s64/radix_tlb.c | 46 ------------------------
arch/powerpc/mm/mmu_context.c | 4 +--
7 files changed, 28 insertions(+), 113 deletions(-)
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 652ce85f9410..bb5c7e5e142e 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -122,12 +122,6 @@ static inline bool need_extra_context(struct mm_struct *mm, unsigned long ea)
}
#endif
-#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU)
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm);
-#else
-static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { }
-#endif
-
extern void switch_cop(struct mm_struct *next);
extern int use_cop(unsigned long acop, struct mm_struct *mm);
extern void drop_cop(unsigned long acop, struct mm_struct *mm);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ad16331c3370..c3064075f1d7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -806,6 +806,10 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
/* KVM does not support mflags=2 (AIL=2) */
if (mflags != 0 && mflags != 3)
return H_UNSUPPORTED_FLAG_START;
+ /* Prefetch bug */
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG) &&
+ kvmhv_vcpu_is_radix(vcpu) && mflags == 3)
+ return H_UNSUPPORTED_FLAG_START;
return H_TOO_HARD;
default:
return H_TOO_HARD;
@@ -4286,8 +4290,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
* The TLB prefetch bug fixup is only in the kvmppc_run_vcpu
* path, which also handles hash and dependent threads mode.
*/
- if (kvm->arch.threads_indep && kvm_is_radix(kvm) &&
- !cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ if (kvm->arch.threads_indep && kvm_is_radix(kvm))
r = kvmhv_run_single_vcpu(vcpu, ~(u64)0,
vcpu->arch.vcore->lpcr);
else
@@ -4914,6 +4917,9 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
if (!indep_threads_mode && !cpu_has_feature(CPU_FTR_HVMODE)) {
pr_warn("KVM: Ignoring indep_threads_mode=N in nested hypervisor\n");
kvm->arch.threads_indep = true;
+ } else if (!indep_threads_mode && cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
+ pr_warn("KVM: Ignoring indep_threads_mode=N on pre-DD2.2 POWER9\n");
+ kvm->arch.threads_indep = true;
} else {
kvm->arch.threads_indep = indep_threads_mode;
}
diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c
index b93d861d8538..9784da3f8565 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupt.c
+++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
@@ -223,6 +223,9 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
mtspr(SPRN_AMOR, ~0UL);
+ if (cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(msr & ~(MSR_IR|MSR_DR|MSR_RI), 0);
+
switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
/*
@@ -231,7 +234,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
*/
mtspr(SPRN_HDEC, hdec);
- __mtmsrd(0, 1); /* clear RI */
+ if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
+ __mtmsrd(0, 1); /* clear RI */
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
@@ -338,8 +342,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
radix_clear_slb();
- __mtmsrd(msr, 0);
-
accumulate_time(vcpu, &vcpu->arch.rm_exit);
/* Advance host PURR/SPURR by the amount used by guest */
@@ -406,6 +408,12 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
switch_mmu_to_host_radix(kvm, host_pidr);
+ /*
+ * If we are in real mode, don't switch MMU on until the MMU is
+ * switched to host, to avoid the P9 radix prefetch bug.
+ */
+ __mtmsrd(msr, 0);
+
end_timing(vcpu);
return trap;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6118e8a97ddd..61f71a7df238 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1710,40 +1710,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
eieio
tlbsync
ptesync
-
-BEGIN_FTR_SECTION
- /* Radix: Handle the case where the guest used an illegal PID */
- LOAD_REG_ADDR(r4, mmu_base_pid)
- lwz r3, VCPU_GUEST_PID(r9)
- lwz r5, 0(r4)
- cmpw cr0,r3,r5
- blt 2f
-
- /*
- * Illegal PID, the HW might have prefetched and cached in the TLB
- * some translations for the LPID 0 / guest PID combination which
- * Linux doesn't know about, so we need to flush that PID out of
- * the TLB. First we need to set LPIDR to 0 so tlbiel applies to
- * the right context.
- */
- li r0,0
- mtspr SPRN_LPID,r0
- isync
-
- /* Then do a congruence class local flush */
- ld r6,VCPU_KVM(r9)
- lwz r0,KVM_TLB_SETS(r6)
- mtctr r0
- li r7,0x400 /* IS field = 0b01 */
- ptesync
- sldi r0,r3,32 /* RS has PID */
-1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */
- addi r7,r7,0x1000
- bdnz 1b
- ptesync
-END_FTR_SECTION_IFSET(CPU_FTR_P9_RADIX_PREFETCH_BUG)
-
-2:
#endif /* CONFIG_PPC_RADIX_MMU */
/*
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 98f0b243c1ab..1ea95891a79e 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -357,30 +357,19 @@ static void __init radix_init_pgtable(void)
}
/* Find out how many PID bits are supported */
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
- mmu_base_pid = 1;
- } else if (cpu_has_feature(CPU_FTR_HVMODE)) {
- if (!mmu_pid_bits)
- mmu_pid_bits = 20;
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ if (!cpu_has_feature(CPU_FTR_HVMODE) &&
+ cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG)) {
/*
- * When KVM is possible, we only use the top half of the
- * PID space to avoid collisions between host and guest PIDs
- * which can cause problems due to prefetch when exiting the
- * guest with AIL=3
+ * Older versions of KVM on these machines perfer if the
+ * guest only uses the low 19 PID bits.
*/
- mmu_base_pid = 1 << (mmu_pid_bits - 1);
-#else
- mmu_base_pid = 1;
-#endif
- } else {
- /* The guest uses the bottom half of the PID space */
if (!mmu_pid_bits)
mmu_pid_bits = 19;
- mmu_base_pid = 1;
+ } else {
+ if (!mmu_pid_bits)
+ mmu_pid_bits = 20;
}
+ mmu_base_pid = 1;
/*
* Allocate Partition table and process table for the
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index 409e61210789..312236a6b085 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -1336,49 +1336,3 @@ void radix__flush_tlb_all(void)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(0) : "memory");
asm volatile("eieio; tlbsync; ptesync": : :"memory");
}
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
-{
- unsigned long pid = mm->context.id;
-
- if (unlikely(pid == MMU_NO_CONTEXT))
- return;
-
- if (!cpu_has_feature(CPU_FTR_P9_RADIX_PREFETCH_BUG))
- return;
-
- /*
- * If this context hasn't run on that CPU before and KVM is
- * around, there's a slim chance that the guest on another
- * CPU just brought in obsolete translation into the TLB of
- * this CPU due to a bad prefetch using the guest PID on
- * the way into the hypervisor.
- *
- * We work around this here. If KVM is possible, we check if
- * any sibling thread is in KVM. If it is, the window may exist
- * and thus we flush that PID from the core.
- *
- * A potential future improvement would be to mark which PIDs
- * have never been used on the system and avoid it if the PID
- * is new and the process has no other cpumask bit set.
- */
- if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) {
- int cpu = smp_processor_id();
- int sib = cpu_first_thread_sibling(cpu);
- bool flush = false;
-
- for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
- if (sib == cpu)
- continue;
- if (!cpu_possible(sib))
- continue;
- if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
- flush = true;
- }
- if (flush)
- _tlbiel_pid(pid, RIC_FLUSH_ALL);
- }
-}
-EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround);
-#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c
index 18f20da0d348..7479d39976c9 100644
--- a/arch/powerpc/mm/mmu_context.c
+++ b/arch/powerpc/mm/mmu_context.c
@@ -81,9 +81,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
if (cpu_has_feature(CPU_FTR_ALTIVEC))
asm volatile ("dssall");
- if (new_on_cpu)
- radix_kvm_prefetch_workaround(next);
- else
+ if (!new_on_cpu)
membarrier_arch_switch_mm(prev, next, tsk);
/*
--
2.23.0
^ permalink raw reply related
* [PATCH v2 29/37] KVM: PPC: Book3S HV P9: Switch to guest MMU context as late as possible
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
Move WARN_ON traps early so they are less likely to get tangled
on CPU switching to guest. Move MMU context switch as late as
reasonably possible to minimise code running with guest context
switched in. This becomes more important when this code may run
in real-mode, with later changes.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv_interrupt.c | 40 +++++++++++++-------------
arch/powerpc/kvm/book3s_hv_nested.c | 1 +
2 files changed, 21 insertions(+), 20 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c
index dd0a78a69f49..b93d861d8538 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupt.c
+++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
@@ -143,8 +143,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
if (hdec < 0)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
+ WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
+ WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
+
start_timing(vcpu, &vcpu->arch.rm_entry);
+ vcpu->arch.ceded = 0;
+
if (vc->tb_offset) {
u64 new_tb = tb + vc->tb_offset;
mtspr(SPRN_TBU40, new_tb);
@@ -193,26 +198,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
- mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
- mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
- mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
- mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
-
- mtspr(SPRN_AMOR, ~0UL);
-
- switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
-
- /*
- * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
- * so set guest LPCR (with HDICE) before writing HDEC.
- */
- mtspr(SPRN_HDEC, hdec);
-
- vcpu->arch.ceded = 0;
-
- WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
- WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
-
mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
@@ -231,6 +216,21 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
*/
mtspr(SPRN_HDSISR, HDSISR_CANARY);
+ mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+ mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+ mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+ mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+ mtspr(SPRN_AMOR, ~0UL);
+
+ switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+
+ /*
+ * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
+ * so set guest LPCR (with HDICE) before writing HDEC.
+ */
+ mtspr(SPRN_HDEC, hdec);
+
__mtmsrd(0, 1); /* clear RI */
mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c
index 0cd0e7aad588..cdf3ee2145ab 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -323,6 +323,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
mask = LPCR_DPFD | LPCR_ILE | LPCR_TC | LPCR_AIL | LPCR_LD |
LPCR_LPES | LPCR_MER;
+ /* XXX: set lpcr in sanitise hv regs? Why is it plumbed through? */
lpcr = (vc->lpcr & ~mask) | (l2_hv.lpcr & mask);
sanitise_hv_regs(vcpu, &l2_hv);
restore_hv_regs(vcpu, &l2_hv);
--
2.23.0
^ permalink raw reply related
* [PATCH v2 28/37] KVM: PPC: Book3S HV P9: Add helpers for OS SPR handling
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
This is a first step to wrapping supervisor and user SPR saving and
loading up into helpers, which will then be called independently in
bare metal and nested HV cases in order to optimise SPR access.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 131 ++++++++++++++++++++++-------------
1 file changed, 84 insertions(+), 47 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 94989fe2fdfe..ad16331c3370 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3442,6 +3442,84 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
}
+static void load_spr_state(struct kvm_vcpu *vcpu)
+{
+ mtspr(SPRN_DSCR, vcpu->arch.dscr);
+ mtspr(SPRN_IAMR, vcpu->arch.iamr);
+ mtspr(SPRN_PSPB, vcpu->arch.pspb);
+ mtspr(SPRN_FSCR, vcpu->arch.fscr);
+ mtspr(SPRN_TAR, vcpu->arch.tar);
+ mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
+ mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
+ mtspr(SPRN_BESCR, vcpu->arch.bescr);
+ mtspr(SPRN_WORT, vcpu->arch.wort);
+ mtspr(SPRN_TIDR, vcpu->arch.tid);
+ /* XXX: DAR, DSISR must be set with MSR[RI] clear (or hstate as appropriate) */
+ mtspr(SPRN_AMR, vcpu->arch.amr);
+ mtspr(SPRN_UAMOR, vcpu->arch.uamor);
+
+ if (!(vcpu->arch.ctrl & 1))
+ mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+}
+
+static void store_spr_state(struct kvm_vcpu *vcpu)
+{
+ vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
+
+ vcpu->arch.iamr = mfspr(SPRN_IAMR);
+ vcpu->arch.pspb = mfspr(SPRN_PSPB);
+ vcpu->arch.fscr = mfspr(SPRN_FSCR);
+ vcpu->arch.tar = mfspr(SPRN_TAR);
+ vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
+ vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
+ vcpu->arch.bescr = mfspr(SPRN_BESCR);
+ vcpu->arch.wort = mfspr(SPRN_WORT);
+ vcpu->arch.tid = mfspr(SPRN_TIDR);
+ vcpu->arch.amr = mfspr(SPRN_AMR);
+ vcpu->arch.uamor = mfspr(SPRN_UAMOR);
+ vcpu->arch.dscr = mfspr(SPRN_DSCR);
+}
+
+/*
+ * Privileged (non-hypervisor) host registers to save.
+ */
+struct p9_host_os_sprs {
+ unsigned long dscr;
+ unsigned long tidr;
+ unsigned long iamr;
+ unsigned long amr;
+ unsigned long fscr;
+};
+
+static void save_p9_host_os_sprs(struct p9_host_os_sprs *host_os_sprs)
+{
+ host_os_sprs->dscr = mfspr(SPRN_DSCR);
+ host_os_sprs->tidr = mfspr(SPRN_TIDR);
+ host_os_sprs->iamr = mfspr(SPRN_IAMR);
+ host_os_sprs->amr = mfspr(SPRN_AMR);
+ host_os_sprs->fscr = mfspr(SPRN_FSCR);
+}
+
+/* vcpu guest regs must already be saved */
+static void restore_p9_host_os_sprs(struct kvm_vcpu *vcpu,
+ struct p9_host_os_sprs *host_os_sprs)
+{
+ mtspr(SPRN_PSPB, 0);
+ mtspr(SPRN_WORT, 0);
+ mtspr(SPRN_UAMOR, 0);
+ mtspr(SPRN_PSPB, 0);
+
+ mtspr(SPRN_DSCR, host_os_sprs->dscr);
+ mtspr(SPRN_TIDR, host_os_sprs->tidr);
+ mtspr(SPRN_IAMR, host_os_sprs->iamr);
+
+ if (host_os_sprs->amr != vcpu->arch.amr)
+ mtspr(SPRN_AMR, host_os_sprs->amr);
+
+ if (host_os_sprs->fscr != vcpu->arch.fscr)
+ mtspr(SPRN_FSCR, host_os_sprs->fscr);
+}
+
/*
* Virtual-mode guest entry for POWER9 and later when the host and
* guest are both using the radix MMU. The LPIDR has already been set.
@@ -3450,11 +3528,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long lpcr)
{
struct kvmppc_vcore *vc = vcpu->arch.vcore;
- unsigned long host_dscr = mfspr(SPRN_DSCR);
- unsigned long host_tidr = mfspr(SPRN_TIDR);
- unsigned long host_iamr = mfspr(SPRN_IAMR);
- unsigned long host_amr = mfspr(SPRN_AMR);
- unsigned long host_fscr = mfspr(SPRN_FSCR);
+ struct p9_host_os_sprs host_os_sprs;
s64 dec;
u64 tb, next_timer;
int trap, save_pmu;
@@ -3469,6 +3543,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.ceded = 0;
+ save_p9_host_os_sprs(&host_os_sprs);
+
kvmhv_save_host_pmu(); /* saves it to PACA kvm_hstate */
kvmppc_subcore_enter_guest();
@@ -3496,22 +3572,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
#endif
mtspr(SPRN_VRSAVE, vcpu->arch.vrsave);
- mtspr(SPRN_DSCR, vcpu->arch.dscr);
- mtspr(SPRN_IAMR, vcpu->arch.iamr);
- mtspr(SPRN_PSPB, vcpu->arch.pspb);
- mtspr(SPRN_FSCR, vcpu->arch.fscr);
- mtspr(SPRN_TAR, vcpu->arch.tar);
- mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
- mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
- mtspr(SPRN_BESCR, vcpu->arch.bescr);
- mtspr(SPRN_WORT, vcpu->arch.wort);
- mtspr(SPRN_TIDR, vcpu->arch.tid);
- /* XXX: DAR, DSISR must be set with MSR[RI] clear (or hstate as appropriate) */
- mtspr(SPRN_AMR, vcpu->arch.amr);
- mtspr(SPRN_UAMOR, vcpu->arch.uamor);
-
- if (!(vcpu->arch.ctrl & 1))
- mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+ load_spr_state(vcpu);
/*
* XXX: must always deal with irq_work_raise via NMI vs setting DEC.
@@ -3605,34 +3666,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
vcpu->arch.thread_cpu = -1;
- vcpu->arch.ctrl = mfspr(SPRN_CTRLF);
-
- vcpu->arch.iamr = mfspr(SPRN_IAMR);
- vcpu->arch.pspb = mfspr(SPRN_PSPB);
- vcpu->arch.fscr = mfspr(SPRN_FSCR);
- vcpu->arch.tar = mfspr(SPRN_TAR);
- vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
- vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
- vcpu->arch.bescr = mfspr(SPRN_BESCR);
- vcpu->arch.wort = mfspr(SPRN_WORT);
- vcpu->arch.tid = mfspr(SPRN_TIDR);
- vcpu->arch.amr = mfspr(SPRN_AMR);
- vcpu->arch.uamor = mfspr(SPRN_UAMOR);
- vcpu->arch.dscr = mfspr(SPRN_DSCR);
-
- mtspr(SPRN_PSPB, 0);
- mtspr(SPRN_WORT, 0);
- mtspr(SPRN_UAMOR, 0);
- mtspr(SPRN_DSCR, host_dscr);
- mtspr(SPRN_TIDR, host_tidr);
- mtspr(SPRN_IAMR, host_iamr);
- mtspr(SPRN_PSPB, 0);
- if (host_amr != vcpu->arch.amr)
- mtspr(SPRN_AMR, host_amr);
+ restore_p9_host_os_sprs(vcpu, &host_os_sprs);
- if (host_fscr != vcpu->arch.fscr)
- mtspr(SPRN_FSCR, host_fscr);
+ store_spr_state(vcpu);
msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
store_fp_state(&vcpu->arch.fp);
--
2.23.0
^ permalink raw reply related
* [PATCH v2 27/37] KVM: PPC: Book3S HV P9: Move SPR loading after expiry time check
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
This is wasted work if the time limit is exceeded.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv_interrupt.c | 38 ++++++++++++++++----------
1 file changed, 23 insertions(+), 15 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c
index 4a158c8fc0bc..dd0a78a69f49 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupt.c
+++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
@@ -126,22 +126,17 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
s64 hdec;
u64 tb, purr, spurr;
u64 *exsave;
- bool ri_clear;
- unsigned long msr = mfmsr();
int trap;
- unsigned long host_hfscr = mfspr(SPRN_HFSCR);
- unsigned long host_ciabr = mfspr(SPRN_CIABR);
- unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
- unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
- unsigned long host_psscr = mfspr(SPRN_PSSCR);
- unsigned long host_pidr = mfspr(SPRN_PID);
- unsigned long host_dawr1 = 0;
- unsigned long host_dawrx1 = 0;
-
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- host_dawr1 = mfspr(SPRN_DAWR1);
- host_dawrx1 = mfspr(SPRN_DAWRX1);
- }
+ bool ri_clear;
+ unsigned long msr;
+ unsigned long host_hfscr;
+ unsigned long host_ciabr;
+ unsigned long host_dawr0;
+ unsigned long host_dawrx0;
+ unsigned long host_psscr;
+ unsigned long host_pidr;
+ unsigned long host_dawr1;
+ unsigned long host_dawrx1;
tb = mftb();
hdec = time_limit - tb;
@@ -159,6 +154,19 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
vc->tb_offset_applied = vc->tb_offset;
}
+ msr = mfmsr();
+
+ host_hfscr = mfspr(SPRN_HFSCR);
+ host_ciabr = mfspr(SPRN_CIABR);
+ host_dawr0 = mfspr(SPRN_DAWR0);
+ host_dawrx0 = mfspr(SPRN_DAWRX0);
+ host_psscr = mfspr(SPRN_PSSCR);
+ host_pidr = mfspr(SPRN_PID);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ host_dawr1 = mfspr(SPRN_DAWR1);
+ host_dawrx1 = mfspr(SPRN_DAWRX1);
+ }
+
if (vc->pcr)
mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
mtspr(SPRN_DPDES, vc->dpdes);
--
2.23.0
^ permalink raw reply related
* [PATCH v2 26/37] KVM: PPC: Book3S HV P9: Improve exit timing accounting coverage
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
The C conversion caused exit timing to become a bit cramped. Expand it
to cover more of the entry and exit code.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv_interrupt.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c
index f5fef7398e37..4a158c8fc0bc 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupt.c
+++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
@@ -148,6 +148,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
if (hdec < 0)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
+ start_timing(vcpu, &vcpu->arch.rm_entry);
+
if (vc->tb_offset) {
u64 new_tb = tb + vc->tb_offset;
mtspr(SPRN_TBU40, new_tb);
@@ -198,8 +200,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
*/
mtspr(SPRN_HDEC, hdec);
- start_timing(vcpu, &vcpu->arch.rm_entry);
-
vcpu->arch.ceded = 0;
WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
@@ -334,8 +334,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
accumulate_time(vcpu, &vcpu->arch.rm_exit);
- end_timing(vcpu);
-
/* Advance host PURR/SPURR by the amount used by guest */
purr = mfspr(SPRN_PURR);
spurr = mfspr(SPRN_SPURR);
@@ -400,6 +398,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
switch_mmu_to_host_radix(kvm, host_pidr);
+ end_timing(vcpu);
+
return trap;
}
EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
--
2.23.0
^ permalink raw reply related
* [PATCH v2 25/37] KVM: PPC: Book3S HV P9: Read machine check registers while MSR[RI] is 0
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
SRR0/1, DAR, DSISR must all be protected from machine check which can
clobber them. Ensure MSR[RI] is clear while they are live.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 5 +++--
arch/powerpc/kvm/book3s_hv_interrupt.c | 26 +++++++++++++++++++++++---
arch/powerpc/kvm/book3s_hv_ras.c | 5 +++++
3 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index f99503acdda5..94989fe2fdfe 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3506,8 +3506,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
mtspr(SPRN_BESCR, vcpu->arch.bescr);
mtspr(SPRN_WORT, vcpu->arch.wort);
mtspr(SPRN_TIDR, vcpu->arch.tid);
- mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
- mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+ /* XXX: DAR, DSISR must be set with MSR[RI] clear (or hstate as appropriate) */
mtspr(SPRN_AMR, vcpu->arch.amr);
mtspr(SPRN_UAMOR, vcpu->arch.uamor);
@@ -3553,6 +3552,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
hvregs.vcpu_token = vcpu->vcpu_id;
}
hvregs.hdec_expiry = time_limit;
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
__pa(&vcpu->arch.regs));
kvmhv_restore_hv_return_state(vcpu, &hvregs);
diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c
index dea3eca3648a..f5fef7398e37 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupt.c
+++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
@@ -126,6 +126,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
s64 hdec;
u64 tb, purr, spurr;
u64 *exsave;
+ bool ri_clear;
unsigned long msr = mfmsr();
int trap;
unsigned long host_hfscr = mfspr(SPRN_HFSCR);
@@ -197,9 +198,6 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
*/
mtspr(SPRN_HDEC, hdec);
- mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
- mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
-
start_timing(vcpu, &vcpu->arch.rm_entry);
vcpu->arch.ceded = 0;
@@ -225,6 +223,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
*/
mtspr(SPRN_HDSISR, HDSISR_CANARY);
+ __mtmsrd(0, 1); /* clear RI */
+
+ mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
+ mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
+ mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+ mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
+
accumulate_time(vcpu, &vcpu->arch.guest_time);
local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST_HV_FAST;
@@ -240,6 +245,13 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+ /* HSRR interrupts leave MSR[RI] unchanged, SRR interrupts clear it. */
+ if ((local_paca->kvm_hstate.scratch0 & 0x2) &&
+ (vcpu->arch.shregs.msr & MSR_RI))
+ ri_clear = false;
+ else
+ ri_clear = true;
+
trap = local_paca->kvm_hstate.scratch0 & ~0x2;
if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
exsave = local_paca->exgen;
@@ -251,6 +263,14 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
+
+ if (ri_clear) {
+/// XXX this fires maybe on syscalls on mambo WARN_ON((mfmsr() & MSR_RI));
+ __mtmsrd(MSR_RI, 1); /* set RI after reading machine check regs (DAR, DSISR, SRR0/1) and hstate scratch (which we need to move into exsave) */
+ } else {
+ WARN_ON(!(mfmsr() & MSR_RI));
+ }
+
vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index d4bca93b79f6..7a645f4428c2 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -198,6 +198,7 @@ static void kvmppc_tb_resync_done(void)
* value. Hence the idea is to resync the TB on every HMI, so that we
* know about the exact state of the TB value. Resync TB call will
* restore TB to host timebase.
+ * XXX: could use new opal hmi handler flags for this
*
* Things to consider:
* - On TB error, HMI interrupt is reported on all the threads of the core
@@ -290,6 +291,10 @@ long kvmppc_realmode_hmi_handler(void)
*/
wait_for_subcore_guest_exit();
+ /*
+ * XXX: Is this safe with independent threads mode?
+ */
+
/*
* At this point we are sure that primary threads from each
* subcore on this core have completed guest->host partition
--
2.23.0
^ permalink raw reply related
* [PATCH v2 24/37] KVM: PPC: Book3S HV P9: inline kvmhv_load_hv_regs_and_go into __kvmhv_vcpu_entry_p9
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
Now the initial C implementation is done, inline more HV code to make
rearranging things easier.
And rename __kvmhv_vcpu_entry_p9 to drop the leading underscores as it's
now C, and is now a more complete vcpu entry.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/kvm_book3s_64.h | 2 +-
arch/powerpc/kvm/book3s_hv.c | 181 +----------------------
arch/powerpc/kvm/book3s_hv_interrupt.c | 168 ++++++++++++++++++++-
3 files changed, 169 insertions(+), 182 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index c214bcffb441..eaf3a562bf1e 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -153,7 +153,7 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
return radix;
}
-int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr);
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 28a2761515e3..f99503acdda5 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3442,183 +3442,6 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
trace_kvmppc_run_core(vc, 1);
}
-static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
-{
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
- struct kvm_nested_guest *nested = vcpu->arch.nested;
- u32 lpid;
-
- lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
-
- mtspr(SPRN_LPID, lpid);
- mtspr(SPRN_LPCR, lpcr);
- mtspr(SPRN_PID, vcpu->arch.pid);
- isync();
-
- /* TLBIEL must have LPIDR set, so set guest LPID before flushing. */
- kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
-}
-
-static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
-{
- mtspr(SPRN_PID, pid);
- mtspr(SPRN_LPID, kvm->arch.host_lpid);
- mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
- isync();
-}
-
-/*
- * Load up hypervisor-mode registers on P9.
- */
-static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
- unsigned long lpcr)
-{
- struct kvm *kvm = vcpu->kvm;
- struct kvmppc_vcore *vc = vcpu->arch.vcore;
- s64 hdec;
- u64 tb, purr, spurr;
- int trap;
- unsigned long host_hfscr = mfspr(SPRN_HFSCR);
- unsigned long host_ciabr = mfspr(SPRN_CIABR);
- unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
- unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
- unsigned long host_psscr = mfspr(SPRN_PSSCR);
- unsigned long host_pidr = mfspr(SPRN_PID);
- unsigned long host_dawr1 = 0;
- unsigned long host_dawrx1 = 0;
-
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- host_dawr1 = mfspr(SPRN_DAWR1);
- host_dawrx1 = mfspr(SPRN_DAWRX1);
- }
-
- tb = mftb();
- hdec = time_limit - tb;
- if (hdec < 0)
- return BOOK3S_INTERRUPT_HV_DECREMENTER;
-
- if (vc->tb_offset) {
- u64 new_tb = tb + vc->tb_offset;
- mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
- vc->tb_offset_applied = vc->tb_offset;
- }
-
- if (vc->pcr)
- mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
- mtspr(SPRN_DPDES, vc->dpdes);
- mtspr(SPRN_VTB, vc->vtb);
-
- local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
- local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
- mtspr(SPRN_PURR, vcpu->arch.purr);
- mtspr(SPRN_SPURR, vcpu->arch.spurr);
-
- if (dawr_enabled()) {
- mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
- mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
- mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
- }
- }
- mtspr(SPRN_CIABR, vcpu->arch.ciabr);
- mtspr(SPRN_IC, vcpu->arch.ic);
-
- mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
-
- mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
-
- mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
- mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
- mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
- mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
-
- mtspr(SPRN_AMOR, ~0UL);
-
- switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
-
- /*
- * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
- * so set guest LPCR (with HDICE) before writing HDEC.
- */
- mtspr(SPRN_HDEC, hdec);
-
- mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
- mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
-
- trap = __kvmhv_vcpu_entry_p9(vcpu);
-
- /* Advance host PURR/SPURR by the amount used by guest */
- purr = mfspr(SPRN_PURR);
- spurr = mfspr(SPRN_SPURR);
- mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
- purr - vcpu->arch.purr);
- mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
- spurr - vcpu->arch.spurr);
- vcpu->arch.purr = purr;
- vcpu->arch.spurr = spurr;
-
- vcpu->arch.ic = mfspr(SPRN_IC);
- vcpu->arch.pid = mfspr(SPRN_PID);
- vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
-
- vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
- vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
- vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
- vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
-
- /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
- mtspr(SPRN_PSSCR, host_psscr |
- (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
- mtspr(SPRN_HFSCR, host_hfscr);
- mtspr(SPRN_CIABR, host_ciabr);
- mtspr(SPRN_DAWR0, host_dawr0);
- mtspr(SPRN_DAWRX0, host_dawrx0);
- if (cpu_has_feature(CPU_FTR_DAWR1)) {
- mtspr(SPRN_DAWR1, host_dawr1);
- mtspr(SPRN_DAWRX1, host_dawrx1);
- }
-
- /*
- * Since this is radix, do a eieio; tlbsync; ptesync sequence in
- * case we interrupted the guest between a tlbie and a ptesync.
- */
- asm volatile("eieio; tlbsync; ptesync");
-
- /*
- * cp_abort is required if the processor supports local copy-paste
- * to clear the copy buffer that was under control of the guest.
- */
- if (cpu_has_feature(CPU_FTR_ARCH_31))
- asm volatile(PPC_CP_ABORT);
-
- vc->dpdes = mfspr(SPRN_DPDES);
- vc->vtb = mfspr(SPRN_VTB);
- mtspr(SPRN_DPDES, 0);
- if (vc->pcr)
- mtspr(SPRN_PCR, PCR_MASK);
-
- if (vc->tb_offset_applied) {
- u64 new_tb = mftb() - vc->tb_offset_applied;
- mtspr(SPRN_TBU40, new_tb);
- tb = mftb();
- if ((tb & 0xffffff) < (new_tb & 0xffffff))
- mtspr(SPRN_TBU40, new_tb + 0x1000000);
- vc->tb_offset_applied = 0;
- }
-
- /* HDEC must be at least as large as DEC, so decrementer_max fits */
- mtspr(SPRN_HDEC, decrementer_max);
-
- switch_mmu_to_host_radix(kvm, host_pidr);
-
- return trap;
-}
-
/*
* Virtual-mode guest entry for POWER9 and later when the host and
* guest are both using the radix MMU. The LPIDR has already been set.
@@ -3710,7 +3533,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
* We need to save and restore the guest visible part of the
* psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
* doesn't do this for us. Note only required if pseries since
- * this is done in kvmhv_load_hv_regs_and_go() below otherwise.
+ * this is done in kvmhv_vcpu_entry_p9() below otherwise.
*/
unsigned long host_psscr;
/* call our hypervisor to load up HV regs and go */
@@ -3748,7 +3571,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
} else {
kvmppc_xive_push_vcpu(vcpu);
- trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+ trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
/* H_CEDE has to be handled now, not later */
/* XICS hcalls must be handled before xive is pulled */
if (trap == BOOK3S_INTERRUPT_SYSCALL &&
diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c
index 5a7b036c447f..dea3eca3648a 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupt.c
+++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
@@ -55,6 +55,31 @@ static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator
#define accumulate_time(vcpu, next) do {} while (0)
#endif
+static void switch_mmu_to_guest_radix(struct kvm *kvm, struct kvm_vcpu *vcpu, u64 lpcr)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ struct kvm_nested_guest *nested = vcpu->arch.nested;
+ u32 lpid;
+
+ lpid = nested ? nested->shadow_lpid : kvm->arch.lpid;
+
+ mtspr(SPRN_LPID, lpid);
+ mtspr(SPRN_LPCR, lpcr);
+ mtspr(SPRN_PID, vcpu->arch.pid);
+ isync();
+
+ /* TLBIEL must have LPIDR set, so set guest LPID before flushing. */
+ kvmppc_check_need_tlb_flush(kvm, vc->pcpu, nested);
+}
+
+static void switch_mmu_to_host_radix(struct kvm *kvm, u32 pid)
+{
+ mtspr(SPRN_PID, pid);
+ mtspr(SPRN_LPID, kvm->arch.host_lpid);
+ mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
+ isync();
+}
+
static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
{
asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx));
@@ -94,11 +119,86 @@ static void radix_clear_slb(void)
}
}
-int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu)
+int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpcr)
{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ s64 hdec;
+ u64 tb, purr, spurr;
u64 *exsave;
unsigned long msr = mfmsr();
int trap;
+ unsigned long host_hfscr = mfspr(SPRN_HFSCR);
+ unsigned long host_ciabr = mfspr(SPRN_CIABR);
+ unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
+ unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
+ unsigned long host_psscr = mfspr(SPRN_PSSCR);
+ unsigned long host_pidr = mfspr(SPRN_PID);
+ unsigned long host_dawr1 = 0;
+ unsigned long host_dawrx1 = 0;
+
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ host_dawr1 = mfspr(SPRN_DAWR1);
+ host_dawrx1 = mfspr(SPRN_DAWRX1);
+ }
+
+ tb = mftb();
+ hdec = time_limit - tb;
+ if (hdec < 0)
+ return BOOK3S_INTERRUPT_HV_DECREMENTER;
+
+ if (vc->tb_offset) {
+ u64 new_tb = tb + vc->tb_offset;
+ mtspr(SPRN_TBU40, new_tb);
+ tb = mftb();
+ if ((tb & 0xffffff) < (new_tb & 0xffffff))
+ mtspr(SPRN_TBU40, new_tb + 0x1000000);
+ vc->tb_offset_applied = vc->tb_offset;
+ }
+
+ if (vc->pcr)
+ mtspr(SPRN_PCR, vc->pcr | PCR_MASK);
+ mtspr(SPRN_DPDES, vc->dpdes);
+ mtspr(SPRN_VTB, vc->vtb);
+
+ local_paca->kvm_hstate.host_purr = mfspr(SPRN_PURR);
+ local_paca->kvm_hstate.host_spurr = mfspr(SPRN_SPURR);
+ mtspr(SPRN_PURR, vcpu->arch.purr);
+ mtspr(SPRN_SPURR, vcpu->arch.spurr);
+
+ if (dawr_enabled()) {
+ mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+ mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ mtspr(SPRN_DAWR1, vcpu->arch.dawr1);
+ mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1);
+ }
+ }
+ mtspr(SPRN_CIABR, vcpu->arch.ciabr);
+ mtspr(SPRN_IC, vcpu->arch.ic);
+
+ mtspr(SPRN_PSSCR, vcpu->arch.psscr | PSSCR_EC |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+
+ mtspr(SPRN_HFSCR, vcpu->arch.hfscr);
+
+ mtspr(SPRN_SPRG0, vcpu->arch.shregs.sprg0);
+ mtspr(SPRN_SPRG1, vcpu->arch.shregs.sprg1);
+ mtspr(SPRN_SPRG2, vcpu->arch.shregs.sprg2);
+ mtspr(SPRN_SPRG3, vcpu->arch.shregs.sprg3);
+
+ mtspr(SPRN_AMOR, ~0UL);
+
+ switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+
+ /*
+ * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
+ * so set guest LPCR (with HDICE) before writing HDEC.
+ */
+ mtspr(SPRN_HDEC, hdec);
+
+ mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
+ mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
start_timing(vcpu, &vcpu->arch.rm_entry);
@@ -216,6 +316,70 @@ int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu)
end_timing(vcpu);
+ /* Advance host PURR/SPURR by the amount used by guest */
+ purr = mfspr(SPRN_PURR);
+ spurr = mfspr(SPRN_SPURR);
+ mtspr(SPRN_PURR, local_paca->kvm_hstate.host_purr +
+ purr - vcpu->arch.purr);
+ mtspr(SPRN_SPURR, local_paca->kvm_hstate.host_spurr +
+ spurr - vcpu->arch.spurr);
+ vcpu->arch.purr = purr;
+ vcpu->arch.spurr = spurr;
+
+ vcpu->arch.ic = mfspr(SPRN_IC);
+ vcpu->arch.pid = mfspr(SPRN_PID);
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR) & PSSCR_GUEST_VIS;
+
+ vcpu->arch.shregs.sprg0 = mfspr(SPRN_SPRG0);
+ vcpu->arch.shregs.sprg1 = mfspr(SPRN_SPRG1);
+ vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
+ vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
+
+ /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
+ mtspr(SPRN_PSSCR, host_psscr |
+ (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
+ mtspr(SPRN_HFSCR, host_hfscr);
+ mtspr(SPRN_CIABR, host_ciabr);
+ mtspr(SPRN_DAWR0, host_dawr0);
+ mtspr(SPRN_DAWRX0, host_dawrx0);
+ if (cpu_has_feature(CPU_FTR_DAWR1)) {
+ mtspr(SPRN_DAWR1, host_dawr1);
+ mtspr(SPRN_DAWRX1, host_dawrx1);
+ }
+
+ /*
+ * Since this is radix, do a eieio; tlbsync; ptesync sequence in
+ * case we interrupted the guest between a tlbie and a ptesync.
+ */
+ asm volatile("eieio; tlbsync; ptesync");
+
+ /*
+ * cp_abort is required if the processor supports local copy-paste
+ * to clear the copy buffer that was under control of the guest.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_31))
+ asm volatile(PPC_CP_ABORT);
+
+ vc->dpdes = mfspr(SPRN_DPDES);
+ vc->vtb = mfspr(SPRN_VTB);
+ mtspr(SPRN_DPDES, 0);
+ if (vc->pcr)
+ mtspr(SPRN_PCR, PCR_MASK);
+
+ if (vc->tb_offset_applied) {
+ u64 new_tb = mftb() - vc->tb_offset_applied;
+ mtspr(SPRN_TBU40, new_tb);
+ tb = mftb();
+ if ((tb & 0xffffff) < (new_tb & 0xffffff))
+ mtspr(SPRN_TBU40, new_tb + 0x1000000);
+ vc->tb_offset_applied = 0;
+ }
+
+ /* HDEC must be at least as large as DEC, so decrementer_max fits */
+ mtspr(SPRN_HDEC, decrementer_max);
+
+ switch_mmu_to_host_radix(kvm, host_pidr);
+
return trap;
}
-EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9);
+EXPORT_SYMBOL_GPL(kvmhv_vcpu_entry_p9);
--
2.23.0
^ permalink raw reply related
* [PATCH v2 23/37] KVM: PPC: Book3S HV P9: Implement the rest of the P9 path in C
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
Almost all logic is moved to C, by introducing a new in_guest mode that
selects and branches very early in the interrupt handler to the P9 exit
code.
The remaining assembly is only about 160 lines of low level stack setup,
with VCPU vs host register save and restore, plus a small shim to the
legacy paths in the interrupt handler.
There are two motivations for this, the first is just make the code more
maintainable being in C. The second is to reduce the amount of code
running in a special KVM mode, "realmode". I put that in quotes because
with radix it is no longer necessarily real-mode in the MMU, but it
still has to be treated specially because it may be in real-mode, and
has various important registers like PID, DEC, TB, etc set to guest.
This is hostile to the rest of Linux and can't use arbitrary kernel
functionality or be instrumented well.
This initial patch is a reasonably faithful conversion of the asm code.
It does lack any loop to return quickly back into the guest without
switching out of realmode in the case of unimportant or easily handled
interrupts, as explained in the previous change, handling HV interrupts
in real mode is not so important for P9.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/asm-prototypes.h | 3 +-
arch/powerpc/include/asm/kvm_asm.h | 3 +-
arch/powerpc/include/asm/kvm_book3s_64.h | 8 +
arch/powerpc/kernel/security.c | 5 +-
arch/powerpc/kvm/Makefile | 3 +
arch/powerpc/kvm/book3s_64_entry.S | 181 ++++++++++++++++++
arch/powerpc/kvm/book3s_hv.c | 27 ++-
arch/powerpc/kvm/book3s_hv_interrupt.c | 221 ++++++++++++++++++++++
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 123 +-----------
arch/powerpc/kvm/book3s_xive.c | 34 ++++
10 files changed, 480 insertions(+), 128 deletions(-)
create mode 100644 arch/powerpc/kvm/book3s_hv_interrupt.c
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 939f3c94c8f3..8677d27929fe 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -122,6 +122,7 @@ extern s32 patch__call_flush_branch_caches3;
extern s32 patch__flush_count_cache_return;
extern s32 patch__flush_link_stack_return;
extern s32 patch__call_kvm_flush_link_stack;
+extern s32 patch__call_kvm_flush_link_stack_2;
extern s32 patch__memset_nocache, patch__memcpy_nocache;
extern long flush_branch_caches;
@@ -142,7 +143,7 @@ void kvmhv_load_host_pmu(void);
void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
-int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
long kvmppc_h_set_xdabr(struct kvm_vcpu *vcpu, unsigned long dabr,
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index a3633560493b..b4f9996bd331 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -146,7 +146,8 @@
#define KVM_GUEST_MODE_GUEST 1
#define KVM_GUEST_MODE_SKIP 2
#define KVM_GUEST_MODE_GUEST_HV 3
-#define KVM_GUEST_MODE_HOST_HV 4
+#define KVM_GUEST_MODE_GUEST_HV_FAST 4 /* ISA v3.0 with host radix mode */
+#define KVM_GUEST_MODE_HOST_HV 5
#define KVM_INST_FETCH_FAILED -1
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 9bb9bb370b53..c214bcffb441 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -153,9 +153,17 @@ static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu)
return radix;
}
+int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu);
+
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
#endif
+/*
+ * Invalid HDSISR value which is used to indicate when HW has not set the reg.
+ * Used to work around an errata.
+ */
+#define HDSISR_CANARY 0x7fff
+
/*
* We use a lock bit in HPTE dword 0 to synchronize updates and
* accesses to each HPTE, and another bit to indicate non-present
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index e4e1a94ccf6a..6c37aeed0650 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -430,16 +430,19 @@ device_initcall(stf_barrier_debugfs_init);
static void update_branch_cache_flush(void)
{
- u32 *site;
+ u32 *site, __maybe_unused *site2;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
site = &patch__call_kvm_flush_link_stack;
+ site2 = &patch__call_kvm_flush_link_stack_2;
// This controls the branch from guest_exit_cont to kvm_flush_link_stack
if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
patch_instruction_site(site, ppc_inst(PPC_INST_NOP));
+ patch_instruction_site(site2, ppc_inst(PPC_INST_NOP));
} else {
// Could use HW flush, but that could also flush count cache
patch_branch_site(site, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+ patch_branch_site(site2, (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
}
#endif
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index cdd119028f64..40a55a5ba4ff 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -43,6 +43,9 @@ kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs)
kvm-book3s_64-builtin-objs-$(CONFIG_SPAPR_TCE_IOMMU) := \
book3s_64_vio_hv.o
+kvm-book3s_64-builtin-objs-y += \
+ book3s_hv_interrupt.o
+
kvm-pr-y := \
fpu.o \
emulate.o \
diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S
index f826c8dc2e19..cc7b76865a16 100644
--- a/arch/powerpc/kvm/book3s_64_entry.S
+++ b/arch/powerpc/kvm/book3s_64_entry.S
@@ -1,10 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
#include <asm/asm-offsets.h>
#include <asm/cache.h>
+#include <asm/code-patching-asm.h>
#include <asm/exception-64s.h>
+#include <asm/export.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_book3s_asm.h>
#include <asm/ppc_asm.h>
#include <asm/reg.h>
+#include <asm/ultravisor-api.h>
/*
* These are branched to from interrupt handlers in exception-64s.S which set
@@ -13,13 +17,24 @@
.global kvmppc_hcall
.balign IFETCH_ALIGN_BYTES
kvmppc_hcall:
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,KVM_GUEST_MODE_GUEST_HV_FAST
+ beq kvmppc_p9_exit_hcall
ld r10,PACA_EXGEN+EX_R13(r13)
SET_SCRATCH0(r10)
li r10,0xc00
+ li r11,PACA_EXGEN
+ b 1f
.global kvmppc_interrupt
.balign IFETCH_ALIGN_BYTES
kvmppc_interrupt:
+ std r10,HSTATE_SCRATCH0(r13)
+ lbz r10,HSTATE_IN_GUEST(r13)
+ cmpwi r10,KVM_GUEST_MODE_GUEST_HV_FAST
+ beq kvmppc_p9_exit_interrupt
+ ld r10,HSTATE_SCRATCH0(r13)
+ lbz r11,HSTATE_IN_GUEST(r13)
li r11,PACA_EXGEN
cmpdi r10,0x200
bgt+ 1f
@@ -114,3 +129,169 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
GET_SCRATCH0(r13)
HRFI_TO_KERNEL
#endif
+
+/* Stack frame offsets for kvmppc_hv_entry */
+#define SFS 208
+#define STACK_SLOT_VCPU (SFS-8)
+#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
+
+/*
+ * void kvmppc_p9_enter_guest(struct vcpu *vcpu);
+ *
+ * Enter the guest on a ISAv3.0 or later system where we have exactly
+ * one vcpu per vcore, and both the host and guest are radix, and threads
+ * are set to "indepdent mode".
+ */
+.balign IFETCH_ALIGN_BYTES
+_GLOBAL(kvmppc_p9_enter_guest)
+EXPORT_SYMBOL_GPL(kvmppc_p9_enter_guest)
+ mflr r0
+ std r0,PPC_LR_STKOFF(r1)
+ stdu r1,-SFS(r1)
+
+ std r1,HSTATE_HOST_R1(r13)
+ std r3,STACK_SLOT_VCPU(r1)
+
+ mfcr r4
+ stw r4,SFS+8(r1)
+
+ reg = 14
+ .rept 18
+ std reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ ld r4,VCPU_LR(r3)
+ mtlr r4
+ ld r4,VCPU_CTR(r3)
+ mtctr r4
+ ld r4,VCPU_XER(r3)
+ mtspr SPRN_XER,r4
+
+ ld r1,VCPU_CR(r3)
+
+BEGIN_FTR_SECTION
+ ld r4,VCPU_CFAR(r3)
+ mtspr SPRN_CFAR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
+BEGIN_FTR_SECTION
+ ld r4,VCPU_PPR(r3)
+ mtspr SPRN_PPR,r4
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+ reg = 4
+ .rept 28
+ ld reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ ld r4,VCPU_KVM(r3)
+ lbz r4,KVM_SECURE_GUEST(r4)
+ cmpdi r4,0
+ ld r4,VCPU_GPR(R4)(r3)
+ bne .Lret_to_ultra
+
+ mtcr r1
+
+ ld r0,VCPU_GPR(R0)(r3)
+ ld r1,VCPU_GPR(R1)(r3)
+ ld r2,VCPU_GPR(R2)(r3)
+ ld r3,VCPU_GPR(R3)(r3)
+
+ HRFI_TO_GUEST
+ b .
+
+ /*
+ * Use UV_RETURN ultracall to return control back to the Ultravisor
+ * after processing an hypercall or interrupt that was forwarded
+ * (a.k.a. reflected) to the Hypervisor.
+ *
+ * All registers have already been reloaded except the ucall requires:
+ * R0 = hcall result
+ * R2 = SRR1, so UV can detect a synthesized interrupt (if any)
+ * R3 = UV_RETURN
+ */
+.Lret_to_ultra:
+ mtcr r1
+ ld r1,VCPU_GPR(R1)(r3)
+
+ ld r0,VCPU_GPR(R3)(r3)
+ mfspr r2,SPRN_SRR1
+ LOAD_REG_IMMEDIATE(r3, UV_RETURN)
+ sc 2
+
+/*
+ * kvmppc_p9_exit_hcall and kvmppc_p9_exit_interrupt are branched to from
+ * above if the interrupt was taken for a guest that was entered via
+ * kvmppc_p9_enter_guest().
+ *
+ * This code recovers the host stack and vcpu pointer, saves all GPRs and
+ * CR, LR, CTR, XER as well as guest MSR and NIA into the VCPU, then re-
+ * establishes the host stack and registers to return from the
+ * kvmppc_p9_enter_guest() function.
+ */
+.balign IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_hcall:
+ mfspr r11,SPRN_SRR0
+ mfspr r12,SPRN_SRR1
+ li r10,0xc00
+ std r10,HSTATE_SCRATCH0(r13)
+
+.balign IFETCH_ALIGN_BYTES
+kvmppc_p9_exit_interrupt:
+ std r1,HSTATE_SCRATCH1(r13)
+ std r3,HSTATE_SCRATCH2(r13)
+ ld r1,HSTATE_HOST_R1(r13)
+ ld r3,STACK_SLOT_VCPU(r1)
+
+ std r9,VCPU_CR(r3)
+
+1:
+ std r11,VCPU_PC(r3)
+ std r12,VCPU_MSR(r3)
+
+ reg = 14
+ .rept 18
+ std reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ /* r1, r3, r9-r13 are saved to vcpu by C code */
+ std r0,VCPU_GPR(R0)(r3)
+ std r2,VCPU_GPR(R2)(r3)
+ reg = 4
+ .rept 5
+ std reg,__VCPU_GPR(reg)(r3)
+ reg = reg + 1
+ .endr
+
+ ld r2,PACATOC(r13)
+
+ mflr r4
+ std r4,VCPU_LR(r3)
+ mfspr r4,SPRN_XER
+ std r4,VCPU_XER(r3)
+
+ reg = 14
+ .rept 18
+ ld reg,STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
+ reg = reg + 1
+ .endr
+
+ lwz r4,SFS+8(r1)
+ mtcr r4
+
+ /*
+ * Flush the link stack here, before executing the first blr on the
+ * way out of the guest.
+ *
+ * The link stack won't match coming out of the guest anyway so the
+ * only cost is the flush itself. The call clobbers r0.
+ */
+1: nop
+ patch_site 1b patch__call_kvm_flush_link_stack_2
+
+ addi r1,r1,SFS
+ ld r0,PPC_LR_STKOFF(r1)
+ mtlr r0
+ blr
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1997cf347d3e..28a2761515e3 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1421,6 +1421,8 @@ static int kvmppc_handle_exit_hv(struct kvm_vcpu *vcpu,
*/
case BOOK3S_INTERRUPT_H_DATA_STORAGE:
r = RESUME_PAGE_FAULT;
+ if (vcpu->arch.fault_dsisr == HDSISR_CANARY)
+ r = RESUME_GUEST; /* Just retry if it's the canary */
break;
case BOOK3S_INTERRUPT_H_INST_STORAGE:
vcpu->arch.fault_dar = kvmppc_get_pc(vcpu);
@@ -3736,14 +3738,14 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
mtspr(SPRN_PSSCR_PR, host_psscr);
-
/* H_CEDE has to be handled now, not later */
- if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
+ if (trap == BOOK3S_INTERRUPT_SYSCALL &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
kvmppc_cede(vcpu);
kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
+
} else {
kvmppc_xive_push_vcpu(vcpu);
trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
@@ -3768,9 +3770,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
}
}
kvmppc_xive_pull_vcpu(vcpu);
+
+ vcpu->arch.slb_max = 0;
}
- vcpu->arch.slb_max = 0;
dec = mfspr(SPRN_DEC);
if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
dec = (s32) dec;
@@ -4429,11 +4432,19 @@ static int kvmppc_vcpu_run_hv(struct kvm_vcpu *vcpu)
else
r = kvmppc_run_vcpu(vcpu);
- if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
- !(vcpu->arch.shregs.msr & MSR_PR)) {
- trace_kvm_hcall_enter(vcpu);
- r = kvmppc_pseries_do_hcall(vcpu);
- trace_kvm_hcall_exit(vcpu, r);
+ if (run->exit_reason == KVM_EXIT_PAPR_HCALL) {
+ if (unlikely(vcpu->arch.shregs.msr & MSR_PR)) {
+ /*
+ * Guest userspace executed sc 1, reflect it
+ * back as a privileged program check interrupt.
+ */
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ r = RESUME_GUEST;
+ } else {
+ trace_kvm_hcall_enter(vcpu);
+ r = kvmppc_pseries_do_hcall(vcpu);
+ trace_kvm_hcall_exit(vcpu, r);
+ }
kvmppc_core_prepare_to_enter(vcpu);
} else if (r == RESUME_PAGE_FAULT) {
srcu_idx = srcu_read_lock(&kvm->srcu);
diff --git a/arch/powerpc/kvm/book3s_hv_interrupt.c b/arch/powerpc/kvm/book3s_hv_interrupt.c
new file mode 100644
index 000000000000..5a7b036c447f
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_interrupt.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <asm/asm-prototypes.h>
+#include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+
+#ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
+static void __start_timing(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ u64 tb = mftb() - vc->tb_offset_applied;
+
+ vcpu->arch.cur_activity = next;
+ vcpu->arch.cur_tb_start = tb;
+}
+
+static void __accumulate_time(struct kvm_vcpu *vcpu, struct kvmhv_tb_accumulator *next)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ struct kvmhv_tb_accumulator *curr;
+ u64 tb = mftb() - vc->tb_offset_applied;
+ u64 prev_tb;
+ u64 delta;
+ u64 seq;
+
+ curr = vcpu->arch.cur_activity;
+ vcpu->arch.cur_activity = next;
+ prev_tb = vcpu->arch.cur_tb_start;
+ vcpu->arch.cur_tb_start = tb;
+
+ if (!curr)
+ return;
+
+ delta = tb - prev_tb;
+
+ seq = curr->seqcount;
+ curr->seqcount = seq + 1;
+ smp_wmb();
+ curr->tb_total += delta;
+ if (seq == 0 || delta < curr->tb_min)
+ curr->tb_min = delta;
+ if (delta > curr->tb_max)
+ curr->tb_max = delta;
+ smp_wmb();
+ curr->seqcount = seq + 2;
+}
+
+#define start_timing(vcpu, next) __start_timing(vcpu, next)
+#define end_timing(vcpu) __start_timing(vcpu, NULL)
+#define accumulate_time(vcpu, next) __accumulate_time(vcpu, next)
+#else
+#define start_timing(vcpu, next) do {} while (0)
+#define end_timing(vcpu) do {} while (0)
+#define accumulate_time(vcpu, next) do {} while (0)
+#endif
+
+static inline void mfslb(unsigned int idx, u64 *slbee, u64 *slbev)
+{
+ asm volatile("slbmfev %0,%1" : "=r" (*slbev) : "r" (idx));
+ asm volatile("slbmfee %0,%1" : "=r" (*slbee) : "r" (idx));
+}
+
+static inline void mtslb(unsigned int idx, u64 slbee, u64 slbev)
+{
+ BUG_ON((slbee & 0xfff) != idx);
+
+ asm volatile("slbmte %0,%1" :: "r" (slbev), "r" (slbee));
+}
+
+static inline void slb_invalidate(unsigned int ih)
+{
+ asm volatile("slbia %0" :: "i"(ih));
+}
+
+/*
+ * Malicious or buggy radix guests may have inserted SLB entries
+ * (only 0..3 because radix always runs with UPRT=1), so these must
+ * be cleared here to avoid side-channels. slbmte is used rather
+ * than slbia, as it won't clear cached translations.
+ */
+static void radix_clear_slb(void)
+{
+ u64 slbee, slbev;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ mfslb(i, &slbee, &slbev);
+ if (unlikely(slbee || slbev)) {
+ slbee = i;
+ slbev = 0;
+ mtslb(i, slbee, slbev);
+ }
+ }
+}
+
+int __kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu)
+{
+ u64 *exsave;
+ unsigned long msr = mfmsr();
+ int trap;
+
+ start_timing(vcpu, &vcpu->arch.rm_entry);
+
+ vcpu->arch.ceded = 0;
+
+ WARN_ON_ONCE(vcpu->arch.shregs.msr & MSR_HV);
+ WARN_ON_ONCE(!(vcpu->arch.shregs.msr & MSR_ME));
+
+ mtspr(SPRN_HSRR0, vcpu->arch.regs.nip);
+ mtspr(SPRN_HSRR1, (vcpu->arch.shregs.msr & ~MSR_HV) | MSR_ME);
+
+ /*
+ * On POWER9 DD2.1 and below, sometimes on a Hypervisor Data Storage
+ * Interrupt (HDSI) the HDSISR is not be updated at all.
+ *
+ * To work around this we put a canary value into the HDSISR before
+ * returning to a guest and then check for this canary when we take a
+ * HDSI. If we find the canary on a HDSI, we know the hardware didn't
+ * update the HDSISR. In this case we return to the guest to retake the
+ * HDSI which should correctly update the HDSISR the second time HDSI
+ * entry.
+ *
+ * Just do this on all p9 processors for now.
+ */
+ mtspr(SPRN_HDSISR, HDSISR_CANARY);
+
+ accumulate_time(vcpu, &vcpu->arch.guest_time);
+
+ local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_GUEST_HV_FAST;
+ kvmppc_p9_enter_guest(vcpu);
+ // Radix host and guest means host never runs with guest MMU state
+ local_paca->kvm_hstate.in_guest = KVM_GUEST_MODE_NONE;
+
+ accumulate_time(vcpu, &vcpu->arch.rm_intr);
+
+ /* Get these from r11/12 and paca exsave */
+ vcpu->arch.shregs.srr0 = mfspr(SPRN_SRR0);
+ vcpu->arch.shregs.srr1 = mfspr(SPRN_SRR1);
+ vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
+ vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+
+ trap = local_paca->kvm_hstate.scratch0 & ~0x2;
+ if (likely(trap > BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+ exsave = local_paca->exgen;
+ } else if (trap == BOOK3S_INTERRUPT_SYSTEM_RESET) {
+ exsave = local_paca->exnmi;
+ } else { /* trap == 0x200 */
+ exsave = local_paca->exmc;
+ }
+
+ vcpu->arch.regs.gpr[1] = local_paca->kvm_hstate.scratch1;
+ vcpu->arch.regs.gpr[3] = local_paca->kvm_hstate.scratch2;
+ vcpu->arch.regs.gpr[9] = exsave[EX_R9/sizeof(u64)];
+ vcpu->arch.regs.gpr[10] = exsave[EX_R10/sizeof(u64)];
+ vcpu->arch.regs.gpr[11] = exsave[EX_R11/sizeof(u64)];
+ vcpu->arch.regs.gpr[12] = exsave[EX_R12/sizeof(u64)];
+ vcpu->arch.regs.gpr[13] = exsave[EX_R13/sizeof(u64)];
+ vcpu->arch.ppr = exsave[EX_PPR/sizeof(u64)];
+ vcpu->arch.cfar = exsave[EX_CFAR/sizeof(u64)];
+ vcpu->arch.regs.ctr = exsave[EX_CTR/sizeof(u64)];
+
+ vcpu->arch.last_inst = KVM_INST_FETCH_FAILED;
+
+ if (unlikely(trap == BOOK3S_INTERRUPT_MACHINE_CHECK)) {
+ vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+ vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+ kvmppc_realmode_machine_check(vcpu);
+
+ } else if (unlikely(trap == BOOK3S_INTERRUPT_HMI)) {
+ kvmppc_realmode_hmi_handler();
+
+ } else if (trap == BOOK3S_INTERRUPT_H_EMUL_ASSIST) {
+ vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_DATA_STORAGE) {
+ vcpu->arch.fault_dar = exsave[EX_DAR/sizeof(u64)];
+ vcpu->arch.fault_dsisr = exsave[EX_DSISR/sizeof(u64)];
+ vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_INST_STORAGE) {
+ vcpu->arch.fault_gpa = mfspr(SPRN_ASDR);
+
+ } else if (trap == BOOK3S_INTERRUPT_H_FAC_UNAVAIL) {
+ vcpu->arch.hfscr = mfspr(SPRN_HFSCR);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ /*
+ * Softpatch interrupt for transactional memory emulation cases
+ * on POWER9 DD2.2. This is early in the guest exit path - we
+ * haven't saved registers or done a treclaim yet.
+ */
+ } else if (trap == BOOK3S_INTERRUPT_HV_SOFTPATCH) {
+ vcpu->arch.emul_inst = mfspr(SPRN_HEIR);
+
+ /*
+ * The cases we want to handle here are those where the guest
+ * is in real suspend mode and is trying to transition to
+ * transactional mode.
+ */
+ if (local_paca->kvm_hstate.fake_suspend &&
+ (vcpu->arch.shregs.msr & MSR_TS_S)) {
+ if (kvmhv_p9_tm_emulation_early(vcpu)) {
+ /* Prevent it being handled again. */
+ trap = 0;
+ }
+ }
+#endif
+ }
+
+ radix_clear_slb();
+
+ __mtmsrd(msr, 0);
+
+ accumulate_time(vcpu, &vcpu->arch.rm_exit);
+
+ end_timing(vcpu);
+
+ return trap;
+}
+EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 2d0d14ed1d92..6118e8a97ddd 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -44,9 +44,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define NAPPING_UNSPLIT 3
/* Stack frame offsets for kvmppc_hv_entry */
-#define SFS 208
+#define SFS 160
#define STACK_SLOT_TRAP (SFS-4)
-#define STACK_SLOT_SHORT_PATH (SFS-8)
#define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24)
#define STACK_SLOT_PID (SFS-32)
@@ -59,8 +58,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define STACK_SLOT_UAMOR (SFS-88)
#define STACK_SLOT_DAWR1 (SFS-96)
#define STACK_SLOT_DAWRX1 (SFS-104)
-/* the following is used by the P9 short path */
-#define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */
/*
* Call kvmppc_hv_entry in real mode.
@@ -1008,9 +1005,6 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
no_xive:
#endif /* CONFIG_KVM_XICS */
- li r0, 0
- stw r0, STACK_SLOT_SHORT_PATH(r1)
-
deliver_guest_interrupt: /* r4 = vcpu, r13 = paca */
/* Check if we can deliver an external or decrementer interrupt now */
ld r0, VCPU_PENDING_EXC(r4)
@@ -1030,7 +1024,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_SRR0, r6
mtspr SPRN_SRR1, r7
-fast_guest_entry_c:
ld r10, VCPU_PC(r4)
ld r11, VCPU_MSR(r4)
/* r11 = vcpu->arch.msr & ~MSR_HV */
@@ -1135,97 +1128,6 @@ ret_to_ultra:
ld r4, VCPU_GPR(R4)(r4)
sc 2
-/*
- * Enter the guest on a P9 or later system where we have exactly
- * one vcpu per vcore and we don't need to go to real mode
- * (which implies that host and guest are both using radix MMU mode).
- * r3 = vcpu pointer
- * Most SPRs and all the VSRs have been loaded already.
- */
-_GLOBAL(__kvmhv_vcpu_entry_p9)
-EXPORT_SYMBOL_GPL(__kvmhv_vcpu_entry_p9)
- mflr r0
- std r0, PPC_LR_STKOFF(r1)
- stdu r1, -SFS(r1)
-
- li r0, 1
- stw r0, STACK_SLOT_SHORT_PATH(r1)
-
- std r3, HSTATE_KVM_VCPU(r13)
- mfcr r4
- stw r4, SFS+8(r1)
-
- std r1, HSTATE_HOST_R1(r13)
-
- reg = 14
- .rept 18
- std reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
- reg = reg + 1
- .endr
-
- reg = 14
- .rept 18
- ld reg, __VCPU_GPR(reg)(r3)
- reg = reg + 1
- .endr
-
- mfmsr r10
- std r10, HSTATE_HOST_MSR(r13)
-
- mr r4, r3
- b fast_guest_entry_c
-guest_exit_short_path:
- /*
- * Malicious or buggy radix guests may have inserted SLB entries
- * (only 0..3 because radix always runs with UPRT=1), so these must
- * be cleared here to avoid side-channels. slbmte is used rather
- * than slbia, as it won't clear cached translations.
- */
- li r0,0
- slbmte r0,r0
- li r4,1
- slbmte r0,r4
- li r4,2
- slbmte r0,r4
- li r4,3
- slbmte r0,r4
-
- li r0, KVM_GUEST_MODE_NONE
- stb r0, HSTATE_IN_GUEST(r13)
-
- reg = 14
- .rept 18
- std reg, __VCPU_GPR(reg)(r9)
- reg = reg + 1
- .endr
-
- reg = 14
- .rept 18
- ld reg, STACK_SLOT_NVGPRS + ((reg - 14) * 8)(r1)
- reg = reg + 1
- .endr
-
- lwz r4, SFS+8(r1)
- mtcr r4
-
- mr r3, r12 /* trap number */
-
- addi r1, r1, SFS
- ld r0, PPC_LR_STKOFF(r1)
- mtlr r0
-
- /* If we are in real mode, do a rfid to get back to the caller */
- mfmsr r4
- andi. r5, r4, MSR_IR
- bnelr
- rldicl r5, r4, 64 - MSR_TS_S_LG, 62 /* extract TS field */
- mtspr SPRN_SRR0, r0
- ld r10, HSTATE_HOST_MSR(r13)
- rldimi r10, r5, MSR_TS_S_LG, 63 - MSR_TS_T_LG
- mtspr SPRN_SRR1, r10
- RFI_TO_KERNEL
- b .
-
secondary_too_late:
li r12, 0
stw r12, STACK_SLOT_TRAP(r1)
@@ -1397,14 +1299,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mr r4,r9
bge fast_guest_return
2:
- /* If we came in through the P9 short path, no real mode hcalls */
- lwz r0, STACK_SLOT_SHORT_PATH(r1)
- cmpwi r0, 0
- bne no_try_real
/* See if this is an hcall we can handle in real mode */
cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
beq hcall_try_real_mode
-no_try_real:
/* Hypervisor doorbell - exit only if host IPI flag set */
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
@@ -1447,11 +1344,6 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
bl kvmhv_accumulate_time
#endif
#ifdef CONFIG_KVM_XICS
- /* If we came in through the P9 short path, xive pull is done in C */
- lwz r0, STACK_SLOT_SHORT_PATH(r1)
- cmpwi r0, 0
- bne 1f
-
/* We are exiting, pull the VP from the XIVE */
lbz r0, VCPU_XIVE_PUSHED(r9)
cmpwi cr0, r0, 0
@@ -1496,11 +1388,6 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1: nop
patch_site 1b patch__call_kvm_flush_link_stack
- /* If we came in through the P9 short path, go back out to C now */
- lwz r0, STACK_SLOT_SHORT_PATH(r1)
- cmpwi r0, 0
- bne guest_exit_short_path
-
/* For hash guest, read the guest SLB and save it away */
ld r5, VCPU_KVM(r9)
lbz r0, KVM_RADIX(r5)
@@ -1548,8 +1435,10 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
b guest_bypass
0: /*
- * Sanitise radix guest SLB, see guest_exit_short_path comment.
- * We clear vcpu->arch.slb_max to match earlier behaviour.
+ * Malicious or buggy radix guests may have inserted SLB entries
+ * (only 0..3 because radix always runs with UPRT=1), so these must
+ * be cleared here to avoid side-channels. slbmte is used rather
+ * than slbia, as it won't clear cached translations.
*/
li r0,0
stw r0,VCPU_SLB_MAX(r9)
@@ -3362,7 +3251,7 @@ BEGIN_FTR_SECTION
mtspr SPRN_DAWRX1, r0
END_FTR_SECTION_IFSET(CPU_FTR_DAWR1)
- /* Clear hash and radix guest SLB, see guest_exit_short_path comment. */
+ /* Clear hash and radix guest SLB. */
slbmte r0, r0
PPC_SLBIA(6)
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index d2266d36a7c7..cb03ed147b42 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -161,6 +161,40 @@ void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
+void kvmppc_xive_cede_vcpu(struct kvm_vcpu *vcpu)
+{
+ void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
+
+ if (!esc_vaddr)
+ return;
+
+ /* we are using XIVE with single escalation */
+
+ if (vcpu->arch.xive_esc_on) {
+ /*
+ * If we still have a pending escalation, abort the cede,
+ * and we must set PQ to 10 rather than 00 so that we don't
+ * potentially end up with two entries for the escalation
+ * interrupt in the XIVE interrupt queue. In that case
+ * we also don't want to set xive_esc_on to 1 here in
+ * case we race with xive_esc_irq().
+ */
+ vcpu->arch.ceded = 0;
+ /*
+ * The escalation interrupts are special as we don't EOI them.
+ * There is no need to use the load-after-store ordering offset
+ * to set PQ to 10 as we won't use StoreEOI.
+ */
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
+ } else {
+ vcpu->arch.xive_esc_on = true;
+ mb();
+ __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
+ }
+ mb();
+}
+EXPORT_SYMBOL_GPL(kvmppc_xive_cede_vcpu);
+
/*
* This is a simple trigger for a generic XIVE IRQ. This must
* only be called for interrupts that support a trigger page
--
2.23.0
^ permalink raw reply related
* [PATCH v2 22/37] KVM: PPC: Book3S HV P9: Reduce irq_work vs guest decrementer races
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
irq_work's use of the DEC SPR is racy with guest<->host switch and guest
entry which flips the DEC interrupt to guest, which could lose a host
work interrupt.
This patch closes one race, and attempts to comment several others.
(XXX: should think a bit harder about this)
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/paca.h | 1 +
arch/powerpc/kvm/book3s_hv.c | 15 ++++++++++++++-
2 files changed, 15 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index ec18ac818e3a..23c12048fbc9 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -174,6 +174,7 @@ struct paca_struct {
u8 irq_happened; /* irq happened while soft-disabled */
u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* Could have irq_work_using_hdec here, but what about nested HV entry modifying DEC? Could have a pointer to the hv struct time limit */
u8 pmcregs_in_use; /* pseries puts this in lppaca */
#endif
u64 sprg_vdso; /* Saved user-visible sprg */
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d98958b78830..1997cf347d3e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3689,6 +3689,18 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
+ /*
+ * XXX: must always deal with irq_work_raise via NMI vs setting DEC.
+ * The problem occurs right as we switch into guest mode if a NMI
+ * hits and sets pending work and sets DEC, then that will apply to
+ * the guest and not bring us back to the host.
+ *
+ * irq_work_raise could check a flag (or possibly LPCR[HDICE] for
+ * example) and set HDEC to 1? That wouldn't solve the nested hv
+ * case which needs to abort the hcall or zero the time limit.
+ *
+ * Another day's problem.
+ */
mtspr(SPRN_DEC, vcpu->arch.dec_expires - tb);
if (kvmhv_on_pseries()) {
@@ -3822,7 +3834,8 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
- mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - tb);
+ set_dec_or_work(local_paca->kvm_hstate.dec_expires - tb);
+
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
kvmhv_load_host_pmu();
--
2.23.0
^ permalink raw reply related
* [PATCH v2 21/37] powerpc: add set_dec_or_work API for safely updating decrementer
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
Decrementer updates must always check for new irq work to avoid an
irq work decrementer interrupt being lost.
Add an API for this in the timer code so callers don't have to care
about details.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/time.h | 9 +++++++++
arch/powerpc/kernel/time.c | 20 +++++++++++---------
2 files changed, 20 insertions(+), 9 deletions(-)
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 0128cd9769bc..d62bde57bf02 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -78,6 +78,15 @@ static inline void set_dec(u64 val)
mtspr(SPRN_DEC, val - 1);
}
+#ifdef CONFIG_IRQ_WORK
+void set_dec_or_work(u64 val);
+#else
+static inline void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+}
+#endif
+
static inline unsigned long tb_ticks_since(unsigned long tstamp)
{
return mftb() - tstamp;
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index b67d93a609a2..e35156858e6e 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -561,6 +561,15 @@ void arch_irq_work_raise(void)
preempt_enable();
}
+void set_dec_or_work(u64 val)
+{
+ set_dec(val);
+ /* We may have raced with new irq work */
+ if (unlikely(test_irq_work_pending()))
+ set_dec(1);
+}
+EXPORT_SYMBOL_GPL(set_dec_or_work);
+
#else /* CONFIG_IRQ_WORK */
#define test_irq_work_pending() 0
@@ -628,10 +637,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt)
} else {
now = *next_tb - now;
if (now <= decrementer_max)
- set_dec(now);
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ set_dec_or_work(now);
__this_cpu_inc(irq_stat.timer_irqs_others);
}
@@ -873,11 +879,7 @@ static int decrementer_set_next_event(unsigned long evt,
struct clock_event_device *dev)
{
__this_cpu_write(decrementers_next_tb, get_tb() + evt);
- set_dec(evt);
-
- /* We may have raced with new irq work */
- if (test_irq_work_pending())
- set_dec(1);
+ set_dec_or_work(evt);
return 0;
}
--
2.23.0
^ permalink raw reply related
* [PATCH v2 20/37] KVM: PPC: Book3S HV P9: Reduce mftb per guest entry/exit
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
mftb is serialising (dispatch next-to-complete) so it is heavy weight
for a mfspr. Avoid reading it multiple times in the entry or exit paths.
A small number of cycles delay to timers is tolerable.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 735ec40ece86..d98958b78830 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3689,7 +3689,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
if (!(vcpu->arch.ctrl & 1))
mtspr(SPRN_CTRLT, mfspr(SPRN_CTRLF) & ~1);
- mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
+ mtspr(SPRN_DEC, vcpu->arch.dec_expires - tb);
if (kvmhv_on_pseries()) {
/*
@@ -3822,7 +3822,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
vc->entry_exit_map = 0x101;
vc->in_guest = 0;
- mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - mftb());
+ mtspr(SPRN_DEC, local_paca->kvm_hstate.dec_expires - tb);
mtspr(SPRN_SPRG_VDSO_WRITE, local_paca->sprg_vdso);
kvmhv_load_host_pmu();
--
2.23.0
^ permalink raw reply related
* [PATCH v2 19/37] KVM: PPC: Book3S HV P9: Use host timer accounting to avoid decrementer read
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
There is no need to save away the host DEC value, as it is derived
from the host timer subsystem, which maintains the next timer time.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/time.h | 5 +++++
arch/powerpc/kvm/book3s_hv.c | 12 ++++++------
2 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 68d94711811e..0128cd9769bc 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -101,6 +101,11 @@ extern void __init time_init(void);
DECLARE_PER_CPU(u64, decrementers_next_tb);
+static inline u64 timer_get_next_tb(void)
+{
+ return __this_cpu_read(decrementers_next_tb);
+}
+
/* Convert timebase ticks to nanoseconds */
unsigned long long tb_to_ns(unsigned long long tb_ticks);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 913582bd848f..735ec40ece86 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3631,16 +3631,16 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
unsigned long host_amr = mfspr(SPRN_AMR);
unsigned long host_fscr = mfspr(SPRN_FSCR);
s64 dec;
- u64 tb;
+ u64 tb, next_timer;
int trap, save_pmu;
- dec = mfspr(SPRN_DEC);
tb = mftb();
- if (dec < 0)
+ next_timer = timer_get_next_tb();
+ if (tb >= next_timer)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
- local_paca->kvm_hstate.dec_expires = dec + tb;
- if (local_paca->kvm_hstate.dec_expires < time_limit)
- time_limit = local_paca->kvm_hstate.dec_expires;
+ local_paca->kvm_hstate.dec_expires = next_timer;
+ if (next_timer < time_limit)
+ time_limit = next_timer;
vcpu->arch.ceded = 0;
--
2.23.0
^ permalink raw reply related
* [PATCH v2 18/37] KVM: PPC: Book3S HV P9: Use large decrementer for HDEC
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
On processors that don't suppress the HDEC exceptions when LPCR[HDICE]=0,
this could help reduce needless guest exits due to leftover exceptions on
entering the guest.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/time.h | 2 ++
arch/powerpc/kvm/book3s_hv.c | 3 ++-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index 8dd3cdb25338..68d94711811e 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -18,6 +18,8 @@
#include <asm/vdso/timebase.h>
/* time.c */
+extern u64 decrementer_max;
+
extern unsigned long tb_ticks_per_jiffy;
extern unsigned long tb_ticks_per_usec;
extern unsigned long tb_ticks_per_sec;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 63cc92c45c5d..913582bd848f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3609,7 +3609,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
vc->tb_offset_applied = 0;
}
- mtspr(SPRN_HDEC, 0x7fffffff);
+ /* HDEC must be at least as large as DEC, so decrementer_max fits */
+ mtspr(SPRN_HDEC, decrementer_max);
switch_mmu_to_host_radix(kvm, host_pidr);
--
2.23.0
^ permalink raw reply related
* [PATCH v2 17/37] KVM: PPC: Book3S HV P9: Move setting HDEC after switching to guest LPCR
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
LPCR[HDICE]=0 suppresses hypervisor decrementer exceptions on some
processors, so it must be enabled before HDEC is set.
Rather than set it in the host LPCR then setting HDEC, move the HDEC
update to after the guest MMU context (including LPCR) is loaded.
There shouldn't be much concern with delaying HDEC by some 10s or 100s
of nanoseconds by setting it a bit later.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 24 ++++++++++--------------
1 file changed, 10 insertions(+), 14 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d4770b222d7e..63cc92c45c5d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3490,23 +3490,13 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
host_dawrx1 = mfspr(SPRN_DAWRX1);
}
- /*
- * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0,
- * so set HDICE before writing HDEC.
- */
- mtspr(SPRN_LPCR, kvm->arch.host_lpcr | LPCR_HDICE);
- isync();
-
- hdec = time_limit - mftb();
- if (hdec < 0) {
- mtspr(SPRN_LPCR, kvm->arch.host_lpcr);
- isync();
+ tb = mftb();
+ hdec = time_limit - tb;
+ if (hdec < 0)
return BOOK3S_INTERRUPT_HV_DECREMENTER;
- }
- mtspr(SPRN_HDEC, hdec);
if (vc->tb_offset) {
- u64 new_tb = mftb() + vc->tb_offset;
+ u64 new_tb = tb + vc->tb_offset;
mtspr(SPRN_TBU40, new_tb);
tb = mftb();
if ((tb & 0xffffff) < (new_tb & 0xffffff))
@@ -3549,6 +3539,12 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
+ /*
+ * P9 suppresses the HDEC exception when LPCR[HDICE] = 0,
+ * so set guest LPCR (with HDICE) before writing HDEC.
+ */
+ mtspr(SPRN_HDEC, hdec);
+
mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
--
2.23.0
^ permalink raw reply related
* [PATCH v2 16/37] KVM: PPC: Book3S HV P9: Stop handling hcalls in real-mode in the P9 path
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
In the interest of minimising the amount of code that is run in
"real-mode", don't handle hcalls in real mode in the P9 path.
POWER8 and earlier are much more expensive to exit from HV real mode
and switch to host mode, because on those processors HV interrupts get
to the hypervisor with the MMU off, and the other threads in the core
need to be pulled out of the guest, and SLBs all need to be saved,
ERATs invalidated, and host SLB reloaded before the MMU is re-enabled
in host mode. Hash guests also require a lot of hcalls to run. The
XICS interrupt controller requires hcalls to run.
By contrast, POWER9 has independent thread switching, and in radix mode
the hypervisor is already in a host virtual memory mode when the HV
interrupt is taken. Radix + xive guests don't need hcalls to handle
interrupts or manage translations.
So it's much less important to handle hcalls in real mode in P9.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/include/asm/kvm_ppc.h | 5 +++++
arch/powerpc/kvm/book3s_hv.c | 25 ++++++++++++++++++++++---
arch/powerpc/kvm/book3s_hv_rmhandlers.S | 5 +++++
arch/powerpc/kvm/book3s_xive.c | 25 +++++++++++++++++++++++++
4 files changed, 57 insertions(+), 3 deletions(-)
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 73b1ca5a6471..db6646c2ade2 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -607,6 +607,7 @@ extern void kvmppc_free_pimap(struct kvm *kvm);
extern int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall);
extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd);
+extern int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req);
extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu);
extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval);
extern int kvmppc_xics_connect_vcpu(struct kvm_device *dev,
@@ -639,6 +640,8 @@ static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu)
static inline void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
{ return 0; }
+static inline int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+ { return 0; }
#endif
#ifdef CONFIG_KVM_XIVE
@@ -673,6 +676,7 @@ extern int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq,
int level, bool line_status);
extern void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu);
extern void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu);
+extern void kvmppc_xive_cede_vcpu(struct kvm_vcpu *vcpu);
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
{
@@ -714,6 +718,7 @@ static inline int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 ir
int level, bool line_status) { return -ENODEV; }
static inline void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu) { }
static inline void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu) { }
+static inline void kvmppc_xive_cede_vcpu(struct kvm_vcpu *vcpu) { }
static inline int kvmppc_xive_enabled(struct kvm_vcpu *vcpu)
{ return 0; }
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 7e23838b7f9b..d4770b222d7e 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1144,7 +1144,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
* This has to be done early, not in kvmppc_pseries_do_hcall(), so
* that the cede logic in kvmppc_run_single_vcpu() works properly.
*/
-static void kvmppc_nested_cede(struct kvm_vcpu *vcpu)
+static void kvmppc_cede(struct kvm_vcpu *vcpu)
{
vcpu->arch.shregs.msr |= MSR_EE;
vcpu->arch.ceded = 1;
@@ -3731,15 +3731,34 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
- kvmppc_nested_cede(vcpu);
+ kvmppc_cede(vcpu);
kvmppc_set_gpr(vcpu, 3, 0);
trap = 0;
}
} else {
kvmppc_xive_push_vcpu(vcpu);
trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
- kvmppc_xive_pull_vcpu(vcpu);
+ /* H_CEDE has to be handled now, not later */
+ /* XICS hcalls must be handled before xive is pulled */
+ if (trap == BOOK3S_INTERRUPT_SYSCALL &&
+ !(vcpu->arch.shregs.msr & MSR_PR)) {
+ unsigned long req = kvmppc_get_gpr(vcpu, 3);
+ if (req == H_CEDE) {
+ kvmppc_cede(vcpu);
+ kvmppc_xive_cede_vcpu(vcpu); /* may un-cede */
+ kvmppc_set_gpr(vcpu, 3, 0);
+ trap = 0;
+ }
+ if (req == H_EOI || req == H_CPPR || req == H_IPI ||
+ req == H_IPOLL || req == H_XIRR || req == H_XIRR_X) {
+ unsigned long ret;
+ ret = kvmppc_xive_xics_hcall(vcpu, req);
+ kvmppc_set_gpr(vcpu, 3, ret);
+ trap = 0;
+ }
+ }
+ kvmppc_xive_pull_vcpu(vcpu);
}
vcpu->arch.slb_max = 0;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index c11597f815e4..2d0d14ed1d92 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1397,9 +1397,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
mr r4,r9
bge fast_guest_return
2:
+ /* If we came in through the P9 short path, no real mode hcalls */
+ lwz r0, STACK_SLOT_SHORT_PATH(r1)
+ cmpwi r0, 0
+ bne no_try_real
/* See if this is an hcall we can handle in real mode */
cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
beq hcall_try_real_mode
+no_try_real:
/* Hypervisor doorbell - exit only if host IPI flag set */
cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 8632fb998a55..d2266d36a7c7 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -2109,6 +2109,31 @@ static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
return 0;
}
+int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+
+ switch (req) {
+ case H_XIRR:
+ return xive_vm_h_xirr(vcpu);
+ case H_CPPR:
+ return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_EOI:
+ return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_IPI:
+ return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+ kvmppc_get_gpr(vcpu, 5));
+ case H_IPOLL:
+ return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
+ case H_XIRR_X:
+ xive_vm_h_xirr(vcpu);
+ kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset);
+ return H_SUCCESS;
+ }
+
+ return H_UNSUPPORTED;
+}
+
int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
--
2.23.0
^ permalink raw reply related
* [PATCH v2 15/37] KVM: PPC: Book3S HV P9: Move xive vcpu context management into kvmhv_p9_guest_entry
From: Nicholas Piggin @ 2021-02-25 13:46 UTC (permalink / raw)
To: kvm-ppc; +Cc: linuxppc-dev, Nicholas Piggin
In-Reply-To: <20210225134652.2127648-1-npiggin@gmail.com>
Move the xive management up so the low level register switching can be
pushed further down in a later patch. XIVE MMIO CI operations can run in
higher level code with machine checks, tracing, etc., available.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
arch/powerpc/kvm/book3s_hv.c | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e3344d58537d..7e23838b7f9b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3549,15 +3549,11 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit,
switch_mmu_to_guest_radix(kvm, vcpu, lpcr);
- kvmppc_xive_push_vcpu(vcpu);
-
mtspr(SPRN_SRR0, vcpu->arch.shregs.srr0);
mtspr(SPRN_SRR1, vcpu->arch.shregs.srr1);
trap = __kvmhv_vcpu_entry_p9(vcpu);
- kvmppc_xive_pull_vcpu(vcpu);
-
/* Advance host PURR/SPURR by the amount used by guest */
purr = mfspr(SPRN_PURR);
spurr = mfspr(SPRN_SPURR);
@@ -3740,7 +3736,10 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
trap = 0;
}
} else {
+ kvmppc_xive_push_vcpu(vcpu);
trap = kvmhv_load_hv_regs_and_go(vcpu, time_limit, lpcr);
+ kvmppc_xive_pull_vcpu(vcpu);
+
}
vcpu->arch.slb_max = 0;
--
2.23.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox