* [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits
@ 2015-07-16 21:29 Mario Smarduch
2015-07-16 21:29 ` [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore " Mario Smarduch
` (3 more replies)
0 siblings, 4 replies; 12+ messages in thread
From: Mario Smarduch @ 2015-07-16 21:29 UTC (permalink / raw)
To: linux-arm-kernel
Currently we save/restore fp/simd on each exit. The first patch optimizes arm64
save/restore, we only do so on Guest access. hackbench and several lmbench
tests show anywhere from 30% to 50% of exits don't context switch the vfp/simd
registers.
For second patch 32-bit handler is updated to keep exit handling consistent
with 64-bit code.
Changes since v3:
- Per Christoffers comment - changed comment for skip fp/simd in patch 1/2
- Changed cover text, clarify optimization in the context of this patch
Changes since v2:
- Only for patch 2/2
- Removed load_vcpu in switch_to_guest_vfp per Marcs comment
- Got another chance to replace an unreferenced label with a comment
Changes since v1:
- only for patch 2/2
- Reworked trapping to vfp access handler
Changes since inital version:
- Addressed Marcs comments
- Verified optimization improvements with lmbench and hackbench, updated
commit message
Mario Smarduch (2):
Optimize arm64 skip 30-50% vfp/simd save/restore on exits
keep arm vfp/simd exit handling consistent with arm64
arch/arm/kvm/interrupts.S | 14 +++++++------
arch/arm64/include/asm/kvm_arm.h | 5 ++++-
arch/arm64/kvm/hyp.S | 45 +++++++++++++++++++++++++++++++++++++---
3 files changed, 54 insertions(+), 10 deletions(-)
--
1.9.1
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-07-16 21:29 [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Mario Smarduch
@ 2015-07-16 21:29 ` Mario Smarduch
2015-08-05 16:11 ` Marc Zyngier
2015-07-16 21:29 ` [PATCH v4 2/2] arm: KVM: keep arm vfp/simd exit handling consistent with arm64 Mario Smarduch
` (2 subsequent siblings)
3 siblings, 1 reply; 12+ messages in thread
From: Mario Smarduch @ 2015-07-16 21:29 UTC (permalink / raw)
To: linux-arm-kernel
This patch only saves and restores FP/SIMD registers on Guest access. To do
this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
context is not saved/restored
Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
arch/arm64/include/asm/kvm_arm.h | 5 ++++-
arch/arm64/kvm/hyp.S | 45 +++++++++++++++++++++++++++++++++++++---
2 files changed, 46 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index ac6fafb..7605e09 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -171,10 +171,13 @@
#define HSTR_EL2_TTEE (1 << 16)
#define HSTR_EL2_T(x) (1 << x)
+/* Hyp Coproccessor Trap Register Shifts */
+#define CPTR_EL2_TFP_SHIFT 10
+
/* Hyp Coprocessor Trap Register */
#define CPTR_EL2_TCPAC (1 << 31)
#define CPTR_EL2_TTA (1 << 20)
-#define CPTR_EL2_TFP (1 << 10)
+#define CPTR_EL2_TFP (1 << CPTR_EL2_TFP_SHIFT)
/* Hyp Debug Configuration Register bits */
#define MDCR_EL2_TDRA (1 << 11)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 5befd01..e708d5a 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -673,6 +673,14 @@
tbz \tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
.endm
+/*
+ * Branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping enabled)
+ */
+.macro skip_fpsimd_state tmp, target
+ mrs \tmp, cptr_el2
+ tbnz \tmp, #CPTR_EL2_TFP_SHIFT, \target
+.endm
+
.macro compute_debug_state target
// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
// is set, we do a full save/restore cycle and disable trapping.
@@ -763,6 +771,7 @@
ldr x2, [x0, #VCPU_HCR_EL2]
msr hcr_el2, x2
mov x2, #CPTR_EL2_TTA
+ orr x2, x2, #CPTR_EL2_TFP
msr cptr_el2, x2
mov x2, #(1 << 15) // Trap CP15 Cr=15
@@ -785,7 +794,6 @@
.macro deactivate_traps
mov x2, #HCR_RW
msr hcr_el2, x2
- msr cptr_el2, xzr
msr hstr_el2, xzr
mrs x2, mdcr_el2
@@ -912,6 +920,28 @@ __restore_fpsimd:
restore_fpsimd
ret
+switch_to_guest_fpsimd:
+ push x4, lr
+
+ mrs x2, cptr_el2
+ bic x2, x2, #CPTR_EL2_TFP
+ msr cptr_el2, x2
+
+ mrs x0, tpidr_el2
+
+ ldr x2, [x0, #VCPU_HOST_CONTEXT]
+ kern_hyp_va x2
+ bl __save_fpsimd
+
+ add x2, x0, #VCPU_CONTEXT
+ bl __restore_fpsimd
+
+ pop x4, lr
+ pop x2, x3
+ pop x0, x1
+
+ eret
+
/*
* u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
*
@@ -932,7 +962,6 @@ ENTRY(__kvm_vcpu_run)
kern_hyp_va x2
save_host_regs
- bl __save_fpsimd
bl __save_sysregs
compute_debug_state 1f
@@ -948,7 +977,6 @@ ENTRY(__kvm_vcpu_run)
add x2, x0, #VCPU_CONTEXT
bl __restore_sysregs
- bl __restore_fpsimd
skip_debug_state x3, 1f
bl __restore_debug
@@ -967,7 +995,9 @@ __kvm_vcpu_return:
add x2, x0, #VCPU_CONTEXT
save_guest_regs
+ skip_fpsimd_state x3, 1f
bl __save_fpsimd
+1:
bl __save_sysregs
skip_debug_state x3, 1f
@@ -986,7 +1016,11 @@ __kvm_vcpu_return:
kern_hyp_va x2
bl __restore_sysregs
+ skip_fpsimd_state x3, 1f
bl __restore_fpsimd
+1:
+ /* Clear FPSIMD and Trace trapping */
+ msr cptr_el2, xzr
skip_debug_state x3, 1f
// Clear the dirty flag for the next run, as all the state has
@@ -1201,6 +1235,11 @@ el1_trap:
* x1: ESR
* x2: ESR_EC
*/
+
+ /* Guest accessed VFP/SIMD registers, save host, restore Guest */
+ cmp x2, #ESR_ELx_EC_FP_ASIMD
+ b.eq switch_to_guest_fpsimd
+
cmp x2, #ESR_ELx_EC_DABT_LOW
mov x0, #ESR_ELx_EC_IABT_LOW
ccmp x2, x0, #4, ne
--
1.9.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v4 2/2] arm: KVM: keep arm vfp/simd exit handling consistent with arm64
2015-07-16 21:29 [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Mario Smarduch
2015-07-16 21:29 ` [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore " Mario Smarduch
@ 2015-07-16 21:29 ` Mario Smarduch
2015-07-17 9:28 ` [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Christoffer Dall
2015-07-17 10:28 ` Marc Zyngier
3 siblings, 0 replies; 12+ messages in thread
From: Mario Smarduch @ 2015-07-16 21:29 UTC (permalink / raw)
To: linux-arm-kernel
After enhancing arm64 FP/SIMD exit handling, ARMv7 VFP exit branch is moved
to guest trap handling. This allows us to keep exit handling flow between both
architectures consistent.
Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
arch/arm/kvm/interrupts.S | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 79caf79..b245b4e 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -363,10 +363,6 @@ hyp_hvc:
@ Check syndrome register
mrc p15, 4, r1, c5, c2, 0 @ HSR
lsr r0, r1, #HSR_EC_SHIFT
-#ifdef CONFIG_VFPv3
- cmp r0, #HSR_EC_CP_0_13
- beq switch_to_guest_vfp
-#endif
cmp r0, #HSR_EC_HVC
bne guest_trap @ Not HVC instr.
@@ -380,7 +376,10 @@ hyp_hvc:
cmp r2, #0
bne guest_trap @ Guest called HVC
-host_switch_to_hyp:
+ /*
+ * Getting here means host called HVC, we shift parameters and branch
+ * to Hyp function.
+ */
pop {r0, r1, r2}
/* Check for __hyp_get_vectors */
@@ -411,6 +410,10 @@ guest_trap:
@ Check if we need the fault information
lsr r1, r1, #HSR_EC_SHIFT
+#ifdef CONFIG_VFPv3
+ cmp r1, #HSR_EC_CP_0_13
+ beq switch_to_guest_vfp
+#endif
cmp r1, #HSR_EC_IABT
mrceq p15, 4, r2, c6, c0, 2 @ HIFAR
beq 2f
@@ -479,7 +482,6 @@ guest_trap:
*/
#ifdef CONFIG_VFPv3
switch_to_guest_vfp:
- load_vcpu @ Load VCPU pointer to r0
push {r3-r7}
@ NEON/VFP used. Turn on VFP access.
--
1.9.1
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits
2015-07-16 21:29 [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Mario Smarduch
2015-07-16 21:29 ` [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore " Mario Smarduch
2015-07-16 21:29 ` [PATCH v4 2/2] arm: KVM: keep arm vfp/simd exit handling consistent with arm64 Mario Smarduch
@ 2015-07-17 9:28 ` Christoffer Dall
2015-07-17 10:28 ` Marc Zyngier
3 siblings, 0 replies; 12+ messages in thread
From: Christoffer Dall @ 2015-07-17 9:28 UTC (permalink / raw)
To: linux-arm-kernel
On Thu, Jul 16, 2015 at 02:29:36PM -0700, Mario Smarduch wrote:
> Currently we save/restore fp/simd on each exit. The first patch optimizes arm64
> save/restore, we only do so on Guest access. hackbench and several lmbench
> tests show anywhere from 30% to 50% of exits don't context switch the vfp/simd
> registers.
>
> For second patch 32-bit handler is updated to keep exit handling consistent
> with 64-bit code.
>
For the series:
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits
2015-07-16 21:29 [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Mario Smarduch
` (2 preceding siblings ...)
2015-07-17 9:28 ` [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Christoffer Dall
@ 2015-07-17 10:28 ` Marc Zyngier
3 siblings, 0 replies; 12+ messages in thread
From: Marc Zyngier @ 2015-07-17 10:28 UTC (permalink / raw)
To: linux-arm-kernel
On 16/07/15 22:29, Mario Smarduch wrote:
> Currently we save/restore fp/simd on each exit. The first patch optimizes arm64
> save/restore, we only do so on Guest access. hackbench and several lmbench
> tests show anywhere from 30% to 50% of exits don't context switch the vfp/simd
> registers.
>
> For second patch 32-bit handler is updated to keep exit handling consistent
> with 64-bit code.
This looks pretty good, I'll take these patches in for 4.3.
Thanks,
M.
--
Jazz is not dead. It just smells funny...
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-07-16 21:29 ` [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore " Mario Smarduch
@ 2015-08-05 16:11 ` Marc Zyngier
2015-08-06 11:54 ` Christoffer Dall
2015-08-19 17:49 ` Christoffer Dall
0 siblings, 2 replies; 12+ messages in thread
From: Marc Zyngier @ 2015-08-05 16:11 UTC (permalink / raw)
To: linux-arm-kernel
On 16/07/15 22:29, Mario Smarduch wrote:
> This patch only saves and restores FP/SIMD registers on Guest access. To do
> this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
> lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
> context is not saved/restored
>
> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
So this patch seems to break 32bit guests on arm64. I've had a look,
squashed a few bugs that I dangerously overlooked during the review, but
it still doesn't work (it doesn't crash anymore, but I get random
illegal VFP instructions in 32bit guests).
I'd be glad if someone could eyeball the following patch and tell me
what's going wrong. If we don't find the root cause quickly enough, I'll
have to drop the series from -next, and that'd be a real shame.
Thanks,
M.
commit 5777dc55fbc170426a85e00c26002dd5a795cfa5
Author: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed Aug 5 16:53:01 2015 +0100
KVM: arm64: NOTAFIX: Prevent crash when 32bit guest uses VFP
Since we switch FPSIMD in a lazy way, access to FPEXC32_EL2
must be guarded by skip_fpsimd_state. Otherwise, all hell
break loose.
Also, FPEXC32_EL2 must be restored when we trap to EL2 to
enable floating point.
Note that while it prevents the host from catching fire, the
guest still doesn't work properly, and I don't understand why just
yet.
Not-really-signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index c8e0c70..b53ec5d 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -431,10 +431,12 @@
add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
mrs x4, dacr32_el2
mrs x5, ifsr32_el2
- mrs x6, fpexc32_el2
stp x4, x5, [x3]
- str x6, [x3, #16]
+ skip_fpsimd_state x8, 3f
+ mrs x6, fpexc32_el2
+ str x6, [x3, #16]
+3:
skip_debug_state x8, 2f
mrs x7, dbgvcr32_el2
str x7, [x3, #24]
@@ -461,10 +463,8 @@
add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
ldp x4, x5, [x3]
- ldr x6, [x3, #16]
msr dacr32_el2, x4
msr ifsr32_el2, x5
- msr fpexc32_el2, x6
skip_debug_state x8, 2f
ldr x7, [x3, #24]
@@ -669,12 +669,14 @@ __restore_debug:
ret
__save_fpsimd:
+ skip_fpsimd_state x3, 1f
save_fpsimd
- ret
+1: ret
__restore_fpsimd:
+ skip_fpsimd_state x3, 1f
restore_fpsimd
- ret
+1: ret
switch_to_guest_fpsimd:
push x4, lr
@@ -682,6 +684,7 @@ switch_to_guest_fpsimd:
mrs x2, cptr_el2
bic x2, x2, #CPTR_EL2_TFP
msr cptr_el2, x2
+ isb
mrs x0, tpidr_el2
@@ -692,6 +695,10 @@ switch_to_guest_fpsimd:
add x2, x0, #VCPU_CONTEXT
bl __restore_fpsimd
+ skip_32bit_state x3, 1f
+ ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
+ msr fpexc32_el2, x4
+1:
pop x4, lr
pop x2, x3
pop x0, x1
@@ -754,9 +761,7 @@ __kvm_vcpu_return:
add x2, x0, #VCPU_CONTEXT
save_guest_regs
- skip_fpsimd_state x3, 1f
bl __save_fpsimd
-1:
bl __save_sysregs
skip_debug_state x3, 1f
@@ -777,9 +782,7 @@ __kvm_vcpu_return:
kern_hyp_va x2
bl __restore_sysregs
- skip_fpsimd_state x3, 1f
bl __restore_fpsimd
-1:
/* Clear FPSIMD and Trace trapping */
msr cptr_el2, xzr
--
Jazz is not dead. It just smells funny...
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-08-05 16:11 ` Marc Zyngier
@ 2015-08-06 11:54 ` Christoffer Dall
2015-08-06 12:46 ` Marc Zyngier
2015-08-19 17:49 ` Christoffer Dall
1 sibling, 1 reply; 12+ messages in thread
From: Christoffer Dall @ 2015-08-06 11:54 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, Aug 05, 2015 at 05:11:37PM +0100, Marc Zyngier wrote:
> On 16/07/15 22:29, Mario Smarduch wrote:
> > This patch only saves and restores FP/SIMD registers on Guest access. To do
> > this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
> > lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
> > context is not saved/restored
> >
> > Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
>
> So this patch seems to break 32bit guests on arm64. I've had a look,
> squashed a few bugs that I dangerously overlooked during the review, but
> it still doesn't work (it doesn't crash anymore, but I get random
> illegal VFP instructions in 32bit guests).
>
> I'd be glad if someone could eyeball the following patch and tell me
> what's going wrong. If we don't find the root cause quickly enough, I'll
> have to drop the series from -next, and that'd be a real shame.
>
> Thanks,
>
> M.
>
> commit 5777dc55fbc170426a85e00c26002dd5a795cfa5
> Author: Marc Zyngier <marc.zyngier@arm.com>
> Date: Wed Aug 5 16:53:01 2015 +0100
>
> KVM: arm64: NOTAFIX: Prevent crash when 32bit guest uses VFP
>
> Since we switch FPSIMD in a lazy way, access to FPEXC32_EL2
> must be guarded by skip_fpsimd_state. Otherwise, all hell
> break loose.
>
> Also, FPEXC32_EL2 must be restored when we trap to EL2 to
> enable floating point.
>
> Note that while it prevents the host from catching fire, the
> guest still doesn't work properly, and I don't understand why just
> yet.
>
> Not-really-signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index c8e0c70..b53ec5d 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -431,10 +431,12 @@
> add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
> mrs x4, dacr32_el2
> mrs x5, ifsr32_el2
> - mrs x6, fpexc32_el2
> stp x4, x5, [x3]
> - str x6, [x3, #16]
>
> + skip_fpsimd_state x8, 3f
> + mrs x6, fpexc32_el2
> + str x6, [x3, #16]
> +3:
> skip_debug_state x8, 2f
> mrs x7, dbgvcr32_el2
> str x7, [x3, #24]
> @@ -461,10 +463,8 @@
>
> add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
> ldp x4, x5, [x3]
> - ldr x6, [x3, #16]
> msr dacr32_el2, x4
> msr ifsr32_el2, x5
> - msr fpexc32_el2, x6
>
> skip_debug_state x8, 2f
> ldr x7, [x3, #24]
> @@ -669,12 +669,14 @@ __restore_debug:
> ret
>
> __save_fpsimd:
> + skip_fpsimd_state x3, 1f
> save_fpsimd
> - ret
> +1: ret
>
> __restore_fpsimd:
> + skip_fpsimd_state x3, 1f
> restore_fpsimd
> - ret
> +1: ret
>
> switch_to_guest_fpsimd:
> push x4, lr
> @@ -682,6 +684,7 @@ switch_to_guest_fpsimd:
> mrs x2, cptr_el2
> bic x2, x2, #CPTR_EL2_TFP
> msr cptr_el2, x2
> + isb
ah, EL2 accesses themselves trap too, ouch.
>
> mrs x0, tpidr_el2
>
> @@ -692,6 +695,10 @@ switch_to_guest_fpsimd:
> add x2, x0, #VCPU_CONTEXT
> bl __restore_fpsimd
>
> + skip_32bit_state x3, 1f
> + ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
> + msr fpexc32_el2, x4
> +1:
wait, it looks like you're missing a store of the host fpsimd state in
the switch_to_guest_fpsimd situation.
It think this would be easier to follow if the aarch32 FP registers were
handled as part of the __save_fpsimd and __restore_fpsimd routines
instead.
Does this help anything?
Otherwise, I assume you've tested thoroughly between something like
v4.2-rc2 and this patch, so that you're sure we didn't have the 32-bit
processed crash before?
Thanks,
-Christoffer
> pop x4, lr
> pop x2, x3
> pop x0, x1
> @@ -754,9 +761,7 @@ __kvm_vcpu_return:
> add x2, x0, #VCPU_CONTEXT
>
> save_guest_regs
> - skip_fpsimd_state x3, 1f
> bl __save_fpsimd
> -1:
> bl __save_sysregs
>
> skip_debug_state x3, 1f
> @@ -777,9 +782,7 @@ __kvm_vcpu_return:
> kern_hyp_va x2
>
> bl __restore_sysregs
> - skip_fpsimd_state x3, 1f
> bl __restore_fpsimd
> -1:
> /* Clear FPSIMD and Trace trapping */
> msr cptr_el2, xzr
>
>
> --
> Jazz is not dead. It just smells funny...
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-08-06 11:54 ` Christoffer Dall
@ 2015-08-06 12:46 ` Marc Zyngier
0 siblings, 0 replies; 12+ messages in thread
From: Marc Zyngier @ 2015-08-06 12:46 UTC (permalink / raw)
To: linux-arm-kernel
Hi Christoffer,
On 06/08/15 12:54, Christoffer Dall wrote:
> On Wed, Aug 05, 2015 at 05:11:37PM +0100, Marc Zyngier wrote:
>> On 16/07/15 22:29, Mario Smarduch wrote:
>>> This patch only saves and restores FP/SIMD registers on Guest access. To do
>>> this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
>>> lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
>>> context is not saved/restored
>>>
>>> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
>>
>> So this patch seems to break 32bit guests on arm64. I've had a look,
>> squashed a few bugs that I dangerously overlooked during the review, but
>> it still doesn't work (it doesn't crash anymore, but I get random
>> illegal VFP instructions in 32bit guests).
>>
>> I'd be glad if someone could eyeball the following patch and tell me
>> what's going wrong. If we don't find the root cause quickly enough, I'll
>> have to drop the series from -next, and that'd be a real shame.
>>
>> Thanks,
>>
>> M.
>>
>> commit 5777dc55fbc170426a85e00c26002dd5a795cfa5
>> Author: Marc Zyngier <marc.zyngier@arm.com>
>> Date: Wed Aug 5 16:53:01 2015 +0100
>>
>> KVM: arm64: NOTAFIX: Prevent crash when 32bit guest uses VFP
>>
>> Since we switch FPSIMD in a lazy way, access to FPEXC32_EL2
>> must be guarded by skip_fpsimd_state. Otherwise, all hell
>> break loose.
>>
>> Also, FPEXC32_EL2 must be restored when we trap to EL2 to
>> enable floating point.
>>
>> Note that while it prevents the host from catching fire, the
>> guest still doesn't work properly, and I don't understand why just
>> yet.
>>
>> Not-really-signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>
>> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
>> index c8e0c70..b53ec5d 100644
>> --- a/arch/arm64/kvm/hyp.S
>> +++ b/arch/arm64/kvm/hyp.S
>> @@ -431,10 +431,12 @@
>> add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
>> mrs x4, dacr32_el2
>> mrs x5, ifsr32_el2
>> - mrs x6, fpexc32_el2
>> stp x4, x5, [x3]
>> - str x6, [x3, #16]
>>
>> + skip_fpsimd_state x8, 3f
>> + mrs x6, fpexc32_el2
>> + str x6, [x3, #16]
>> +3:
>> skip_debug_state x8, 2f
>> mrs x7, dbgvcr32_el2
>> str x7, [x3, #24]
>> @@ -461,10 +463,8 @@
>>
>> add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
>> ldp x4, x5, [x3]
>> - ldr x6, [x3, #16]
>> msr dacr32_el2, x4
>> msr ifsr32_el2, x5
>> - msr fpexc32_el2, x6
>>
>> skip_debug_state x8, 2f
>> ldr x7, [x3, #24]
>> @@ -669,12 +669,14 @@ __restore_debug:
>> ret
>>
>> __save_fpsimd:
>> + skip_fpsimd_state x3, 1f
>> save_fpsimd
>> - ret
>> +1: ret
>>
>> __restore_fpsimd:
>> + skip_fpsimd_state x3, 1f
>> restore_fpsimd
>> - ret
>> +1: ret
>>
>> switch_to_guest_fpsimd:
>> push x4, lr
>> @@ -682,6 +684,7 @@ switch_to_guest_fpsimd:
>> mrs x2, cptr_el2
>> bic x2, x2, #CPTR_EL2_TFP
>> msr cptr_el2, x2
>> + isb
>
> ah, EL2 accesses themselves trap too, ouch.
Yeah, the FP architecture has all kind of nasty tricks like this.
>>
>> mrs x0, tpidr_el2
>>
>> @@ -692,6 +695,10 @@ switch_to_guest_fpsimd:
>> add x2, x0, #VCPU_CONTEXT
>> bl __restore_fpsimd
>>
>> + skip_32bit_state x3, 1f
>> + ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
>> + msr fpexc32_el2, x4
>> +1:
>
> wait, it looks like you're missing a store of the host fpsimd state in
> the switch_to_guest_fpsimd situation.
A store of FPEXC32_EL2 for the host? No, this register has no
significance whatsoever for a 64bit host. Its sole purpose is to
accommodate a 32bit guest (see D7.2.31). Or are you thinking of
something else?
> It think this would be easier to follow if the aarch32 FP registers were
> handled as part of the __save_fpsimd and __restore_fpsimd routines
> instead.
Maybe. This code needs some rework...
> Does this help anything?
Not really, sorry... :-(
> Otherwise, I assume you've tested thoroughly between something like
> v4.2-rc2 and this patch, so that you're sure we didn't have the 32-bit
> processed crash before?
Without the lazy switching, we seem to be rock solid. With lazy
switching, I get these illegal instructions, always on VFP ops. My
current thinking is that it has something to do with CPACR_EL1, and the
fact that the 32bit kernel turns it VFP on/off all the time.
/me puzzled...
M.
--
Jazz is not dead. It just smells funny...
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-08-05 16:11 ` Marc Zyngier
2015-08-06 11:54 ` Christoffer Dall
@ 2015-08-19 17:49 ` Christoffer Dall
2015-08-19 21:52 ` Mario Smarduch
1 sibling, 1 reply; 12+ messages in thread
From: Christoffer Dall @ 2015-08-19 17:49 UTC (permalink / raw)
To: linux-arm-kernel
Hi Mario,
On Wed, Aug 05, 2015 at 05:11:37PM +0100, Marc Zyngier wrote:
> On 16/07/15 22:29, Mario Smarduch wrote:
> > This patch only saves and restores FP/SIMD registers on Guest access. To do
> > this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
> > lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
> > context is not saved/restored
> >
> > Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
>
> So this patch seems to break 32bit guests on arm64. I've had a look,
> squashed a few bugs that I dangerously overlooked during the review, but
> it still doesn't work (it doesn't crash anymore, but I get random
> illegal VFP instructions in 32bit guests).
>
> I'd be glad if someone could eyeball the following patch and tell me
> what's going wrong. If we don't find the root cause quickly enough, I'll
> have to drop the series from -next, and that'd be a real shame.
>
> Thanks,
>
> M.
>
> commit 5777dc55fbc170426a85e00c26002dd5a795cfa5
> Author: Marc Zyngier <marc.zyngier@arm.com>
> Date: Wed Aug 5 16:53:01 2015 +0100
>
> KVM: arm64: NOTAFIX: Prevent crash when 32bit guest uses VFP
>
> Since we switch FPSIMD in a lazy way, access to FPEXC32_EL2
> must be guarded by skip_fpsimd_state. Otherwise, all hell
> break loose.
>
> Also, FPEXC32_EL2 must be restored when we trap to EL2 to
> enable floating point.
>
> Note that while it prevents the host from catching fire, the
> guest still doesn't work properly, and I don't understand why just
> yet.
>
> Not-really-signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index c8e0c70..b53ec5d 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -431,10 +431,12 @@
> add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
> mrs x4, dacr32_el2
> mrs x5, ifsr32_el2
> - mrs x6, fpexc32_el2
> stp x4, x5, [x3]
> - str x6, [x3, #16]
>
> + skip_fpsimd_state x8, 3f
> + mrs x6, fpexc32_el2
> + str x6, [x3, #16]
> +3:
> skip_debug_state x8, 2f
> mrs x7, dbgvcr32_el2
> str x7, [x3, #24]
> @@ -461,10 +463,8 @@
>
> add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
> ldp x4, x5, [x3]
> - ldr x6, [x3, #16]
> msr dacr32_el2, x4
> msr ifsr32_el2, x5
> - msr fpexc32_el2, x6
>
> skip_debug_state x8, 2f
> ldr x7, [x3, #24]
> @@ -669,12 +669,14 @@ __restore_debug:
> ret
>
> __save_fpsimd:
> + skip_fpsimd_state x3, 1f
> save_fpsimd
> - ret
> +1: ret
>
> __restore_fpsimd:
> + skip_fpsimd_state x3, 1f
> restore_fpsimd
> - ret
> +1: ret
>
> switch_to_guest_fpsimd:
> push x4, lr
> @@ -682,6 +684,7 @@ switch_to_guest_fpsimd:
> mrs x2, cptr_el2
> bic x2, x2, #CPTR_EL2_TFP
> msr cptr_el2, x2
> + isb
>
> mrs x0, tpidr_el2
>
> @@ -692,6 +695,10 @@ switch_to_guest_fpsimd:
> add x2, x0, #VCPU_CONTEXT
> bl __restore_fpsimd
>
> + skip_32bit_state x3, 1f
> + ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
> + msr fpexc32_el2, x4
> +1:
> pop x4, lr
> pop x2, x3
> pop x0, x1
> @@ -754,9 +761,7 @@ __kvm_vcpu_return:
> add x2, x0, #VCPU_CONTEXT
>
> save_guest_regs
> - skip_fpsimd_state x3, 1f
> bl __save_fpsimd
> -1:
> bl __save_sysregs
>
> skip_debug_state x3, 1f
> @@ -777,9 +782,7 @@ __kvm_vcpu_return:
> kern_hyp_va x2
>
> bl __restore_sysregs
> - skip_fpsimd_state x3, 1f
> bl __restore_fpsimd
> -1:
> /* Clear FPSIMD and Trace trapping */
> msr cptr_el2, xzr
>
>
Marc and I have hunted down the issue at KVM Forum and we believe we've
found the issue. Please have a look at the following follow-up patch to
Marc's patch above:
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 8b2a73b4..842e727 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -769,11 +769,26 @@
.macro activate_traps
ldr x2, [x0, #VCPU_HCR_EL2]
+
+ /*
+ * We are about to set CPTR_EL2.TFP to trap all floating point
+ * register accesses to EL2, however, the ARM ARM clearly states that
+ * traps are only taken to EL2 if the operation would not otherwise
+ * trap to EL1. Therefore, always make sure that for 32-bit guests,
+ * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
+ */
+ tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state
+ mov x3, #(1 << 30)
+ msr fpexc32_el2, x3
+ isb
+99:
+
msr hcr_el2, x2
mov x2, #CPTR_EL2_TTA
orr x2, x2, #CPTR_EL2_TFP
msr cptr_el2, x2
+
mov x2, #(1 << 15) // Trap CP15 Cr=15
msr hstr_el2, x2
Thanks,
-Christoffer
^ permalink raw reply related [flat|nested] 12+ messages in thread
* [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-08-19 17:49 ` Christoffer Dall
@ 2015-08-19 21:52 ` Mario Smarduch
2015-08-19 22:28 ` Marc Zyngier
0 siblings, 1 reply; 12+ messages in thread
From: Mario Smarduch @ 2015-08-19 21:52 UTC (permalink / raw)
To: linux-arm-kernel
Hi Christoffer,
I'll test it and work with it.
Thanks,
Mario
On 8/19/2015 10:49 AM, Christoffer Dall wrote:
> Hi Mario,
>
> On Wed, Aug 05, 2015 at 05:11:37PM +0100, Marc Zyngier wrote:
>> On 16/07/15 22:29, Mario Smarduch wrote:
>>> This patch only saves and restores FP/SIMD registers on Guest access. To do
>>> this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
>>> lmbench, hackbench show significant improvements, for 30-50% exits FP/SIMD
>>> context is not saved/restored
>>>
>>> Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
>>
>> So this patch seems to break 32bit guests on arm64. I've had a look,
>> squashed a few bugs that I dangerously overlooked during the review, but
>> it still doesn't work (it doesn't crash anymore, but I get random
>> illegal VFP instructions in 32bit guests).
>>
>> I'd be glad if someone could eyeball the following patch and tell me
>> what's going wrong. If we don't find the root cause quickly enough, I'll
>> have to drop the series from -next, and that'd be a real shame.
>>
>> Thanks,
>>
>> M.
>>
>> commit 5777dc55fbc170426a85e00c26002dd5a795cfa5
>> Author: Marc Zyngier <marc.zyngier@arm.com>
>> Date: Wed Aug 5 16:53:01 2015 +0100
>>
>> KVM: arm64: NOTAFIX: Prevent crash when 32bit guest uses VFP
>>
>> Since we switch FPSIMD in a lazy way, access to FPEXC32_EL2
>> must be guarded by skip_fpsimd_state. Otherwise, all hell
>> break loose.
>>
>> Also, FPEXC32_EL2 must be restored when we trap to EL2 to
>> enable floating point.
>>
>> Note that while it prevents the host from catching fire, the
>> guest still doesn't work properly, and I don't understand why just
>> yet.
>>
>> Not-really-signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
>>
>> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
>> index c8e0c70..b53ec5d 100644
>> --- a/arch/arm64/kvm/hyp.S
>> +++ b/arch/arm64/kvm/hyp.S
>> @@ -431,10 +431,12 @@
>> add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
>> mrs x4, dacr32_el2
>> mrs x5, ifsr32_el2
>> - mrs x6, fpexc32_el2
>> stp x4, x5, [x3]
>> - str x6, [x3, #16]
>>
>> + skip_fpsimd_state x8, 3f
>> + mrs x6, fpexc32_el2
>> + str x6, [x3, #16]
>> +3:
>> skip_debug_state x8, 2f
>> mrs x7, dbgvcr32_el2
>> str x7, [x3, #24]
>> @@ -461,10 +463,8 @@
>>
>> add x3, x2, #CPU_SYSREG_OFFSET(DACR32_EL2)
>> ldp x4, x5, [x3]
>> - ldr x6, [x3, #16]
>> msr dacr32_el2, x4
>> msr ifsr32_el2, x5
>> - msr fpexc32_el2, x6
>>
>> skip_debug_state x8, 2f
>> ldr x7, [x3, #24]
>> @@ -669,12 +669,14 @@ __restore_debug:
>> ret
>>
>> __save_fpsimd:
>> + skip_fpsimd_state x3, 1f
>> save_fpsimd
>> - ret
>> +1: ret
>>
>> __restore_fpsimd:
>> + skip_fpsimd_state x3, 1f
>> restore_fpsimd
>> - ret
>> +1: ret
>>
>> switch_to_guest_fpsimd:
>> push x4, lr
>> @@ -682,6 +684,7 @@ switch_to_guest_fpsimd:
>> mrs x2, cptr_el2
>> bic x2, x2, #CPTR_EL2_TFP
>> msr cptr_el2, x2
>> + isb
>>
>> mrs x0, tpidr_el2
>>
>> @@ -692,6 +695,10 @@ switch_to_guest_fpsimd:
>> add x2, x0, #VCPU_CONTEXT
>> bl __restore_fpsimd
>>
>> + skip_32bit_state x3, 1f
>> + ldr x4, [x2, #CPU_SYSREG_OFFSET(FPEXC32_EL2)]
>> + msr fpexc32_el2, x4
>> +1:
>> pop x4, lr
>> pop x2, x3
>> pop x0, x1
>> @@ -754,9 +761,7 @@ __kvm_vcpu_return:
>> add x2, x0, #VCPU_CONTEXT
>>
>> save_guest_regs
>> - skip_fpsimd_state x3, 1f
>> bl __save_fpsimd
>> -1:
>> bl __save_sysregs
>>
>> skip_debug_state x3, 1f
>> @@ -777,9 +782,7 @@ __kvm_vcpu_return:
>> kern_hyp_va x2
>>
>> bl __restore_sysregs
>> - skip_fpsimd_state x3, 1f
>> bl __restore_fpsimd
>> -1:
>> /* Clear FPSIMD and Trace trapping */
>> msr cptr_el2, xzr
>>
>>
>
> Marc and I have hunted down the issue at KVM Forum and we believe we've
> found the issue. Please have a look at the following follow-up patch to
> Marc's patch above:
>
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index 8b2a73b4..842e727 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -769,11 +769,26 @@
>
> .macro activate_traps
> ldr x2, [x0, #VCPU_HCR_EL2]
> +
> + /*
> + * We are about to set CPTR_EL2.TFP to trap all floating point
> + * register accesses to EL2, however, the ARM ARM clearly states that
> + * traps are only taken to EL2 if the operation would not otherwise
> + * trap to EL1. Therefore, always make sure that for 32-bit guests,
> + * we set FPEXC.EN to prevent traps to EL1, when setting the TFP bit.
> + */
> + tbnz x2, #HCR_RW_SHIFT, 99f // open code skip_32bit_state
> + mov x3, #(1 << 30)
> + msr fpexc32_el2, x3
> + isb
> +99:
> +
> msr hcr_el2, x2
> mov x2, #CPTR_EL2_TTA
> orr x2, x2, #CPTR_EL2_TFP
> msr cptr_el2, x2
>
> +
> mov x2, #(1 << 15) // Trap CP15 Cr=15
> msr hstr_el2, x2
>
>
>
> Thanks,
> -Christoffer
>
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-08-19 21:52 ` Mario Smarduch
@ 2015-08-19 22:28 ` Marc Zyngier
2015-08-19 23:21 ` Mario Smarduch
0 siblings, 1 reply; 12+ messages in thread
From: Marc Zyngier @ 2015-08-19 22:28 UTC (permalink / raw)
To: linux-arm-kernel
On Wed, 19 Aug 2015 14:52:08 -0700
Mario Smarduch <m.smarduch@samsung.com> wrote:
> Hi Christoffer,
> I'll test it and work with it.
FWIW, I've added these patches to both -queue and -next, and from the
tests Christoffer has run, it looks pretty good.
Thanks,
M.
--
Jazz is not dead. It just smells funny.
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore on exits
2015-08-19 22:28 ` Marc Zyngier
@ 2015-08-19 23:21 ` Mario Smarduch
0 siblings, 0 replies; 12+ messages in thread
From: Mario Smarduch @ 2015-08-19 23:21 UTC (permalink / raw)
To: linux-arm-kernel
Great that's even better.
On 8/19/2015 3:28 PM, Marc Zyngier wrote:
> On Wed, 19 Aug 2015 14:52:08 -0700
> Mario Smarduch <m.smarduch@samsung.com> wrote:
>
>> Hi Christoffer,
>> I'll test it and work with it.
>
> FWIW, I've added these patches to both -queue and -next, and from the
> tests Christoffer has run, it looks pretty good.
>
> Thanks,
>
> M.
>
^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2015-08-19 23:21 UTC | newest]
Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-16 21:29 [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Mario Smarduch
2015-07-16 21:29 ` [PATCH v4 1/2] arm64: KVM: Optimize arm64 skip 30-50% vfp/simd save/restore " Mario Smarduch
2015-08-05 16:11 ` Marc Zyngier
2015-08-06 11:54 ` Christoffer Dall
2015-08-06 12:46 ` Marc Zyngier
2015-08-19 17:49 ` Christoffer Dall
2015-08-19 21:52 ` Mario Smarduch
2015-08-19 22:28 ` Marc Zyngier
2015-08-19 23:21 ` Mario Smarduch
2015-07-16 21:29 ` [PATCH v4 2/2] arm: KVM: keep arm vfp/simd exit handling consistent with arm64 Mario Smarduch
2015-07-17 9:28 ` [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits Christoffer Dall
2015-07-17 10:28 ` Marc Zyngier
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).