linux-arm-kernel.lists.infradead.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE
@ 2015-07-11  1:19 Mario Smarduch
  2015-07-11  1:19 ` [PATCH v4 1/2] arm64: KVM: Optimize arm64 non-VHE fpsimd skip 30-50% save/restore on exits Mario Smarduch
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Mario Smarduch @ 2015-07-11  1:19 UTC (permalink / raw)
  To: linux-arm-kernel

This is a followp to previous iteration but implemented on top of VHE patches. 
Only non-VHE path is addressied by this patch. In second patch 32-bit handler 
is updated to keep exit handling consistent with 64-bit code, and nothing
has changed.

Currently we save/restore fp/simd on each exit, the first  patch optimizes 
arm64 save/restore, we only do so on Guest access. hackbench and
several lmbench tests show anywhere from 30% to 50% of exits don't 
save/restore fp/simd register set.

Tested on Foundation Model, unfortuntely not tested yet on VHE enabled model.

Mario Smarduch (2):
  Optimize arm64 non-VHE fpsimd skip 30-50% save/restore on exits
  keep arm vfp/simd exit handling consistent with arm64

 arch/arm/kvm/interrupts.S        | 14 +++++-----
 arch/arm64/include/asm/kvm_arm.h |  5 +++-
 arch/arm64/kvm/hyp.S             | 58 +++++++++++++++++++++++++++++++++++++---
 3 files changed, 66 insertions(+), 11 deletions(-)

-- 
1.9.1

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v4 1/2] arm64: KVM: Optimize arm64 non-VHE fpsimd skip 30-50% save/restore on exits
  2015-07-11  1:19 [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE Mario Smarduch
@ 2015-07-11  1:19 ` Mario Smarduch
  2015-07-11  1:19 ` [PATCH v4 2/2] keep arm vfp/simd exit handling consistent with arm64 Mario Smarduch
  2015-07-16 15:52 ` [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE Christoffer Dall
  2 siblings, 0 replies; 7+ messages in thread
From: Mario Smarduch @ 2015-07-11  1:19 UTC (permalink / raw)
  To: linux-arm-kernel

This patch only saves and restores FP/SIMD registers on Guest access. To do
this cptr_el2 FP/SIMD trap is set on Guest entry and later checked on exit.
The non-VHE path has been tested, future work would add VHE support.

Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
 arch/arm64/include/asm/kvm_arm.h |  5 +++-
 arch/arm64/kvm/hyp.S             | 58 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index c8998c0..0a1d152 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -172,10 +172,13 @@
 #define HSTR_EL2_TTEE	(1 << 16)
 #define HSTR_EL2_T(x)	(1 << x)
 
+/* Hyp Coproccessor Trap Register Shifts */
+#define CPTR_EL2_TFP_SHIFT 10
+
 /* Hyp Coprocessor Trap Register */
 #define CPTR_EL2_TCPAC	(1 << 31)
 #define CPTR_EL2_TTA	(1 << 20)
-#define CPTR_EL2_TFP	(1 << 10)
+#define CPTR_EL2_TFP	(1 << CPTR_EL2_TFP_SHIFT)
 
 /* Hyp Debug Configuration Register bits */
 #define MDCR_EL2_TDRA		(1 << 11)
diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
index 64a5280..9d154ed 100644
--- a/arch/arm64/kvm/hyp.S
+++ b/arch/arm64/kvm/hyp.S
@@ -731,6 +731,15 @@ ifnvhe "mrs	\tmp, hcr_el2",			_S_(ldr	\tmp, [x0, #VCPU_HCR_EL2])
 	tbz	\tmp, #KVM_ARM64_DEBUG_DIRTY_SHIFT, \target
 .endm
 
+/*
+ * For non-VHE - branch to target if CPTR_EL2.TFP bit is set (VFP/SIMD trapping
+ * enabled). For VHE do nothing.
+ */
+.macro skip_fpsimd_state tmp, target
+ifnvhe	"mrs     \tmp, cptr_el2",				nop
+ifnvhe	_S_(tbnz    \tmp, #CPTR_EL2_TFP_SHIFT, \target),	nop
+.endm
+
 .macro compute_debug_state target
 	// Compute debug state: If any of KDE, MDE or KVM_ARM64_DEBUG_DIRTY
 	// is set, we do a full save/restore cycle and disable trapping.
@@ -823,7 +832,7 @@ ifnvhe "mrs	\tmp, hcr_el2",			_S_(ldr	\tmp, [x0, #VCPU_HCR_EL2])
 	adr	x3, __kvm_hyp_vector
 ifnvhe	nop,					"msr	vbar_el1, x3"
 ifnvhe	nop,					"mrs	x2, cpacr_el1"
-ifnvhe _S_(ldr	x2, =(CPTR_EL2_TTA)),		"orr x2, x2, #(1 << 28)"
+ifnvhe _S_(ldr	x2, =(CPTR_EL2_TTA|CPTR_EL2_TFP)),	"orr x2, x2, #(1 << 28)"
 ifnvhe "msr	cptr_el2, x2",			"msr	cpacr_el1, x2"
 
 	mov	x2, #(1 << 15)	// Trap CP15 Cr=15
@@ -851,7 +860,7 @@ ifnvhe 	nop,					_S_(orr	x2, x2, #HCR_E2H)
 ifnvhe	nop,					"mrs	x2, cpacr_el1"
 ifnvhe	nop,					"movn	x3, #(1 << 12), lsl #16"
 ifnvhe	nop,					"and	x2, x2, x3"
-ifnvhe "msr	cptr_el2, xzr",			"msr	cpacr_el1, x2"
+ifnvhe	nop,					"msr    cpacr_el1, x2"
 	msr	hstr_el2, xzr
 
 	mrs	x2, mdcr_el2
@@ -988,6 +997,33 @@ __restore_fpsimd:
 	ret
 
 /*
+ * For non-VHE - on first FP/SIMD access, restore guest, save host registers
+ * and disable future trapping. For VHE this should never get called.
+ */
+switch_to_guest_fpsimd:
+       push    x4, lr
+
+       mrs     x2, cptr_el2
+       bic     x2, x2, #CPTR_EL2_TFP
+       msr     cptr_el2, x2
+
+       mrs     x0, tpidr_el2
+
+       ldr     x2, [x0, #VCPU_HOST_CONTEXT]
+       kern_hyp_va x2
+       bl __save_fpsimd
+
+       add     x2, x0, #VCPU_CONTEXT
+       bl __restore_fpsimd
+
+       pop     x4, lr
+       pop     x2, x3
+       pop     x0, x1
+
+       eret
+
+
+/*
  * u64 __kvm_vcpu_run(struct kvm_vcpu *vcpu);
  *
  * This is the world switch. The first half of the function
@@ -1007,7 +1043,7 @@ ENTRY(__kvm_vcpu_run)
 	kern_hyp_va x2
 
 	save_host_regs
-	bl __save_fpsimd
+ifnvhe	nop,					"bl __save_fpsimd"
 ifnvhe "bl	__save_sysregs",		"nop"
 	bl	__save_shared_sysregs
 
@@ -1025,7 +1061,7 @@ ifnvhe "bl	__save_sysregs",		"nop"
 
 	bl __restore_sysregs
 	bl __restore_shared_sysregs
-	bl __restore_fpsimd
+ifnvhe	"nop",			"bl __restore_fpsimd"
 
 	skip_debug_state x3, 1f
 	bl	__restore_debug
@@ -1044,7 +1080,9 @@ __kvm_vcpu_return:
 	add	x2, x0, #VCPU_CONTEXT
 
 	save_guest_regs
+	skip_fpsimd_state x3, 1f
 	bl __save_fpsimd
+1:
 	bl __save_sysregs
 	bl __save_shared_sysregs
 
@@ -1072,7 +1110,11 @@ __kvm_vcpu_return_irq:
 
 ifnvhe "bl	__restore_sysregs",		"nop"
 	bl	__restore_shared_sysregs
+	skip_fpsimd_state x3, 1f
 	bl __restore_fpsimd
+1:
+	/* For non-VHE - Clear FPSIMD and Trace trapping, do nothig for VHE */
+ifnvhe	"msr     cptr_el2, xzr",		"nop"
 
 	skip_debug_state x3, 1f
 	// Clear the dirty flag for the next run, as all the state has
@@ -1298,6 +1340,14 @@ el1_trap:
 	 * x1: ESR
 	 * x2: ESR_EC
 	 */
+
+	/*
+	 * For non-VHE Guest accessed FP/SIMD registers, save host, restore
+	 * guest. For VHE condition should never be true.
+	 */
+	cmp     x2, #ESR_ELx_EC_FP_ASIMD
+	b.eq    switch_to_guest_fpsimd
+
 	cmp	x2, #ESR_ELx_EC_DABT_LOW
 	mov	x0, #ESR_ELx_EC_IABT_LOW
 	ccmp	x2, x0, #4, ne
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v4 2/2] keep arm vfp/simd exit handling consistent with arm64
  2015-07-11  1:19 [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE Mario Smarduch
  2015-07-11  1:19 ` [PATCH v4 1/2] arm64: KVM: Optimize arm64 non-VHE fpsimd skip 30-50% save/restore on exits Mario Smarduch
@ 2015-07-11  1:19 ` Mario Smarduch
  2015-07-16 15:52 ` [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE Christoffer Dall
  2 siblings, 0 replies; 7+ messages in thread
From: Mario Smarduch @ 2015-07-11  1:19 UTC (permalink / raw)
  To: linux-arm-kernel

After enhancing arm64 FP/SIMD exit handling, ARMv7 VFP exit branch is moved
to guest trap handling. This allows us to keep exit handling flow between both
architectures consistent.

Signed-off-by: Mario Smarduch <m.smarduch@samsung.com>
---
 arch/arm/kvm/interrupts.S | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
index 568494d..900ef6d 100644
--- a/arch/arm/kvm/interrupts.S
+++ b/arch/arm/kvm/interrupts.S
@@ -361,10 +361,6 @@ hyp_hvc:
 	@ Check syndrome register
 	mrc	p15, 4, r1, c5, c2, 0	@ HSR
 	lsr	r0, r1, #HSR_EC_SHIFT
-#ifdef CONFIG_VFPv3
-	cmp	r0, #HSR_EC_CP_0_13
-	beq	switch_to_guest_vfp
-#endif
 	cmp	r0, #HSR_EC_HVC
 	bne	guest_trap		@ Not HVC instr.
 
@@ -378,7 +374,10 @@ hyp_hvc:
 	cmp     r2, #0
 	bne	guest_trap		@ Guest called HVC
 
-host_switch_to_hyp:
+	/*
+	 * Getting here means host called HVC, we shift parameters and branch
+	 * to Hyp function.
+	 */
 	pop	{r0, r1, r2}
 
 	/* Check for __hyp_get_vectors */
@@ -409,6 +408,10 @@ guest_trap:
 
 	@ Check if we need the fault information
 	lsr	r1, r1, #HSR_EC_SHIFT
+#ifdef CONFIG_VFPv3
+	cmp	r1, #HSR_EC_CP_0_13
+	beq	switch_to_guest_vfp
+#endif
 	cmp	r1, #HSR_EC_IABT
 	mrceq	p15, 4, r2, c6, c0, 2	@ HIFAR
 	beq	2f
@@ -477,7 +480,6 @@ guest_trap:
  */
 #ifdef CONFIG_VFPv3
 switch_to_guest_vfp:
-	load_vcpu			@ Load VCPU pointer to r0
 	push	{r3-r7}
 
 	@ NEON/VFP used.  Turn on VFP access.
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE
  2015-07-11  1:19 [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE Mario Smarduch
  2015-07-11  1:19 ` [PATCH v4 1/2] arm64: KVM: Optimize arm64 non-VHE fpsimd skip 30-50% save/restore on exits Mario Smarduch
  2015-07-11  1:19 ` [PATCH v4 2/2] keep arm vfp/simd exit handling consistent with arm64 Mario Smarduch
@ 2015-07-16 15:52 ` Christoffer Dall
  2015-07-16 18:23   ` Mario Smarduch
  2 siblings, 1 reply; 7+ messages in thread
From: Christoffer Dall @ 2015-07-16 15:52 UTC (permalink / raw)
  To: linux-arm-kernel

On Fri, Jul 10, 2015 at 06:19:05PM -0700, Mario Smarduch wrote:
> This is a followp to previous iteration but implemented on top of VHE patches. 
> Only non-VHE path is addressied by this patch. In second patch 32-bit handler 
> is updated to keep exit handling consistent with 64-bit code, and nothing
> has changed.
> 
Why not simply preserve this the way it was in v3 and have it merged
first - after all we have reviewed it and I thought it was more or less
ready to be merged - I suspect the VHE patches may have a way to go
still ?

-Christoffer

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE
  2015-07-16 15:52 ` [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE Christoffer Dall
@ 2015-07-16 18:23   ` Mario Smarduch
  2015-07-16 19:05     ` Christoffer Dall
  0 siblings, 1 reply; 7+ messages in thread
From: Mario Smarduch @ 2015-07-16 18:23 UTC (permalink / raw)
  To: linux-arm-kernel

On 07/16/2015 08:52 AM, Christoffer Dall wrote:
> On Fri, Jul 10, 2015 at 06:19:05PM -0700, Mario Smarduch wrote:
>> This is a followp to previous iteration but implemented on top of VHE patches. 
>> Only non-VHE path is addressied by this patch. In second patch 32-bit handler 
>> is updated to keep exit handling consistent with 64-bit code, and nothing
>> has changed.
>>
> Why not simply preserve this the way it was in v3 and have it merged
> first - after all we have reviewed it and I thought it was more or less
> ready to be merged - I suspect the VHE patches may have a way to go
> still ?

Definitely, that's a better path. After looking at VHE patches,
I would probably leave V3 the way it is (keeping deactivate_xxxx:
symmetric). Marc has Reviewed V3 and you commented either way was
fine with you, so V3 should be ok.

Jumping on VHE is little too much at this time, thanks for the
alternative I kind of got myself into a jam here.

- Mario

> 
> -Christoffer
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE
  2015-07-16 18:23   ` Mario Smarduch
@ 2015-07-16 19:05     ` Christoffer Dall
  2015-07-16 19:22       ` Mario Smarduch
  0 siblings, 1 reply; 7+ messages in thread
From: Christoffer Dall @ 2015-07-16 19:05 UTC (permalink / raw)
  To: linux-arm-kernel

On Thu, Jul 16, 2015 at 11:23:08AM -0700, Mario Smarduch wrote:
> On 07/16/2015 08:52 AM, Christoffer Dall wrote:
> > On Fri, Jul 10, 2015 at 06:19:05PM -0700, Mario Smarduch wrote:
> >> This is a followp to previous iteration but implemented on top of VHE patches. 
> >> Only non-VHE path is addressied by this patch. In second patch 32-bit handler 
> >> is updated to keep exit handling consistent with 64-bit code, and nothing
> >> has changed.
> >>
> > Why not simply preserve this the way it was in v3 and have it merged
> > first - after all we have reviewed it and I thought it was more or less
> > ready to be merged - I suspect the VHE patches may have a way to go
> > still ?
> 
> Definitely, that's a better path. After looking at VHE patches,
> I would probably leave V3 the way it is (keeping deactivate_xxxx:
> symmetric). Marc has Reviewed V3 and you commented either way was
> fine with you, so V3 should be ok.

Yes, but there was a comment in the assembly file to fix up IIRC.

Can you do a quick respin with that commentary changed and then Marc
can queue that if he agrees?

Thanks,
-Christoffer

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE
  2015-07-16 19:05     ` Christoffer Dall
@ 2015-07-16 19:22       ` Mario Smarduch
  0 siblings, 0 replies; 7+ messages in thread
From: Mario Smarduch @ 2015-07-16 19:22 UTC (permalink / raw)
  To: linux-arm-kernel

On 07/16/2015 12:05 PM, Christoffer Dall wrote:
> On Thu, Jul 16, 2015 at 11:23:08AM -0700, Mario Smarduch wrote:
>> On 07/16/2015 08:52 AM, Christoffer Dall wrote:
>>> On Fri, Jul 10, 2015 at 06:19:05PM -0700, Mario Smarduch wrote:
>>>> This is a followp to previous iteration but implemented on top of VHE patches. 
>>>> Only non-VHE path is addressied by this patch. In second patch 32-bit handler 
>>>> is updated to keep exit handling consistent with 64-bit code, and nothing
>>>> has changed.
>>>>
>>> Why not simply preserve this the way it was in v3 and have it merged
>>> first - after all we have reviewed it and I thought it was more or less
>>> ready to be merged - I suspect the VHE patches may have a way to go
>>> still ?
>>
>> Definitely, that's a better path. After looking at VHE patches,
>> I would probably leave V3 the way it is (keeping deactivate_xxxx:
>> symmetric). Marc has Reviewed V3 and you commented either way was
>> fine with you, so V3 should be ok.
> 
> Yes, but there was a comment in the assembly file to fix up IIRC.

That's right 1/2 and 0/2 header need little bit of editing.

> 
> Can you do a quick respin with that commentary changed and then Marc
> can queue that if he agrees?
> 
> Thanks,
> -Christoffer
> 

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2015-07-16 19:22 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-11  1:19 [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE Mario Smarduch
2015-07-11  1:19 ` [PATCH v4 1/2] arm64: KVM: Optimize arm64 non-VHE fpsimd skip 30-50% save/restore on exits Mario Smarduch
2015-07-11  1:19 ` [PATCH v4 2/2] keep arm vfp/simd exit handling consistent with arm64 Mario Smarduch
2015-07-16 15:52 ` [PATCH v4 0/2] arm/arm64: KVM: Optimize arm64 fp/simd, saves 30-50% on exits for non-VHE Christoffer Dall
2015-07-16 18:23   ` Mario Smarduch
2015-07-16 19:05     ` Christoffer Dall
2015-07-16 19:22       ` Mario Smarduch

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).