* [PATCH v2 1/7] KVM: arm64: Reintroduce __sve_save_state
2024-05-21 16:37 [PATCH v2 0/7] KVM: arm64: Fix handling of host fpsimd/sve state in protected mode Fuad Tabba
@ 2024-05-21 16:37 ` Fuad Tabba
2024-05-21 16:37 ` [PATCH v2 2/7] KVM: arm64: Abstract set/clear of CPTR_EL2 bits behind helper Fuad Tabba
` (5 subsequent siblings)
6 siblings, 0 replies; 21+ messages in thread
From: Fuad Tabba @ 2024-05-21 16:37 UTC (permalink / raw)
To: kvmarm, linux-arm-kernel
Cc: maz, will, qperret, tabba, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
Now that the hypervisor is handling the host sve state in
protected mode, it needs to be able to save it.
This reverts commit e66425fc9ba3 ("KVM: arm64: Remove unused
__sve_save_state").
Signed-off-by: Fuad Tabba <tabba@google.com>
---
arch/arm64/include/asm/kvm_hyp.h | 1 +
arch/arm64/kvm/hyp/fpsimd.S | 6 ++++++
2 files changed, 7 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 3e80464f8953..2ab23589339a 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -111,6 +111,7 @@ void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
+void __sve_save_state(void *sve_pffr, u32 *fpsr);
void __sve_restore_state(void *sve_pffr, u32 *fpsr);
u64 __guest_enter(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/kvm/hyp/fpsimd.S b/arch/arm64/kvm/hyp/fpsimd.S
index 61e6f3ba7b7d..e950875e31ce 100644
--- a/arch/arm64/kvm/hyp/fpsimd.S
+++ b/arch/arm64/kvm/hyp/fpsimd.S
@@ -25,3 +25,9 @@ SYM_FUNC_START(__sve_restore_state)
sve_load 0, x1, x2, 3
ret
SYM_FUNC_END(__sve_restore_state)
+
+SYM_FUNC_START(__sve_save_state)
+ mov x2, #1
+ sve_save 0, x1, x2, 3
+ ret
+SYM_FUNC_END(__sve_save_state)
--
2.45.0.215.g3402c0e53f-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH v2 2/7] KVM: arm64: Abstract set/clear of CPTR_EL2 bits behind helper
2024-05-21 16:37 [PATCH v2 0/7] KVM: arm64: Fix handling of host fpsimd/sve state in protected mode Fuad Tabba
2024-05-21 16:37 ` [PATCH v2 1/7] KVM: arm64: Reintroduce __sve_save_state Fuad Tabba
@ 2024-05-21 16:37 ` Fuad Tabba
2024-05-21 21:08 ` Marc Zyngier
2024-05-21 16:37 ` [PATCH v2 3/7] KVM: arm64: Specialize handling of host fpsimd state on trap Fuad Tabba
` (4 subsequent siblings)
6 siblings, 1 reply; 21+ messages in thread
From: Fuad Tabba @ 2024-05-21 16:37 UTC (permalink / raw)
To: kvmarm, linux-arm-kernel
Cc: maz, will, qperret, tabba, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
The same traps controlled by CPTR_EL2 or CPACR_EL1 need to be
toggled in different parts of the code, but the exact bits and
their polarity differ between these two formats and the mode
(vhe/nvhe/hvhe).
To reduce the amount of duplicated code and the chance of getting
the wrong bit/polarity or missing a field, abstract the set/clear
of CPTR_EL2 bits behind a helper.
Since (h)VHE is the way of the future, use the CPACR_EL1 format,
which is a subset of the VHE CPTR_EL2, as a reference.
No functional change intended.
Suggested-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Fuad Tabba <tabba@google.com>
---
arch/arm64/include/asm/kvm_emulate.h | 34 +++++++++++++++++++++++++
arch/arm64/kvm/hyp/include/hyp/switch.h | 17 +++----------
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 6 +----
3 files changed, 39 insertions(+), 18 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 501e3e019c93..74837d1762e5 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h
@@ -557,6 +557,40 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
vcpu_set_flag((v), e); \
} while (0)
+
+static inline void __cptr_clear_set_nvhe(u64 cpacr_clr, u64 cpacr_set)
+{
+ u64 clr = 0, set = 0;
+
+ if (cpacr_clr & CPACR_ELx_FPEN)
+ set |= CPTR_EL2_TFP;
+ if (cpacr_clr & CPACR_ELx_ZEN)
+ set |= CPTR_EL2_TZ;
+ if (cpacr_clr & CPACR_ELx_SMEN)
+ set |= CPTR_EL2_TSM;
+ if (cpacr_clr & CPACR_ELx_TTA)
+ clr |= CPTR_EL2_TTA;
+
+ if (cpacr_set & CPACR_ELx_FPEN)
+ clr |= CPTR_EL2_TFP;
+ if (cpacr_set & CPACR_ELx_ZEN)
+ clr |= CPTR_EL2_TZ;
+ if (cpacr_set & CPACR_ELx_SMEN)
+ clr |= CPTR_EL2_TSM;
+ if (cpacr_set & CPACR_ELx_TTA)
+ set |= CPTR_EL2_TTA;
+
+ sysreg_clear_set(cptr_el2, clr, set);
+}
+
+static inline void cpacr_clear_set(u64 clr, u64 set)
+{
+ if (has_vhe() || has_hvhe())
+ sysreg_clear_set(cpacr_el1, clr, set);
+ else
+ __cptr_clear_set_nvhe(clr, set);
+}
+
static __always_inline void kvm_write_cptr_el2(u64 val)
{
if (has_vhe() || has_hvhe())
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index a92566f36022..2ebe2fea8768 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -353,19 +353,10 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* Valid trap. Switch the context: */
/* First disable enough traps to allow us to update the registers */
- if (has_vhe() || has_hvhe()) {
- reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
- if (sve_guest)
- reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
-
- sysreg_clear_set(cpacr_el1, 0, reg);
- } else {
- reg = CPTR_EL2_TFP;
- if (sve_guest)
- reg |= CPTR_EL2_TZ;
-
- sysreg_clear_set(cptr_el2, reg, 0);
- }
+ reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
+ if (sve_guest)
+ reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
+ cpacr_clear_set(0, reg);
isb();
/* Write out the host state if it's in the registers */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index d5c48dc98f67..b07d44484f42 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -405,11 +405,7 @@ void handle_trap(struct kvm_cpu_context *host_ctxt)
handle_host_smc(host_ctxt);
break;
case ESR_ELx_EC_SVE:
- if (has_hvhe())
- sysreg_clear_set(cpacr_el1, 0, (CPACR_EL1_ZEN_EL1EN |
- CPACR_EL1_ZEN_EL0EN));
- else
- sysreg_clear_set(cptr_el2, CPTR_EL2_TZ, 0);
+ cpacr_clear_set(0, (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN));
isb();
sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
break;
--
2.45.0.215.g3402c0e53f-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH v2 2/7] KVM: arm64: Abstract set/clear of CPTR_EL2 bits behind helper
2024-05-21 16:37 ` [PATCH v2 2/7] KVM: arm64: Abstract set/clear of CPTR_EL2 bits behind helper Fuad Tabba
@ 2024-05-21 21:08 ` Marc Zyngier
2024-05-22 13:48 ` Fuad Tabba
0 siblings, 1 reply; 21+ messages in thread
From: Marc Zyngier @ 2024-05-21 21:08 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
On Tue, 21 May 2024 17:37:15 +0100,
Fuad Tabba <tabba@google.com> wrote:
>
> The same traps controlled by CPTR_EL2 or CPACR_EL1 need to be
> toggled in different parts of the code, but the exact bits and
> their polarity differ between these two formats and the mode
> (vhe/nvhe/hvhe).
>
> To reduce the amount of duplicated code and the chance of getting
> the wrong bit/polarity or missing a field, abstract the set/clear
> of CPTR_EL2 bits behind a helper.
>
> Since (h)VHE is the way of the future, use the CPACR_EL1 format,
> which is a subset of the VHE CPTR_EL2, as a reference.
>
> No functional change intended.
>
> Suggested-by: Oliver Upton <oliver.upton@linux.dev>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
> arch/arm64/include/asm/kvm_emulate.h | 34 +++++++++++++++++++++++++
> arch/arm64/kvm/hyp/include/hyp/switch.h | 17 +++----------
> arch/arm64/kvm/hyp/nvhe/hyp-main.c | 6 +----
> 3 files changed, 39 insertions(+), 18 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> index 501e3e019c93..74837d1762e5 100644
> --- a/arch/arm64/include/asm/kvm_emulate.h
> +++ b/arch/arm64/include/asm/kvm_emulate.h
> @@ -557,6 +557,40 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
> vcpu_set_flag((v), e); \
> } while (0)
>
> +
> +static inline void __cptr_clear_set_nvhe(u64 cpacr_clr, u64 cpacr_set)
> +{
> + u64 clr = 0, set = 0;
> +
> + if (cpacr_clr & CPACR_ELx_FPEN)
> + set |= CPTR_EL2_TFP;
> + if (cpacr_clr & CPACR_ELx_ZEN)
> + set |= CPTR_EL2_TZ;
> + if (cpacr_clr & CPACR_ELx_SMEN)
These 3 fields are actually pairs of bits. Can we have a compile-time
check that both bits are set?
> + set |= CPTR_EL2_TSM;
> + if (cpacr_clr & CPACR_ELx_TTA)
> + clr |= CPTR_EL2_TTA;
How about TCPAC, TAM, and E0POE?
> +
> + if (cpacr_set & CPACR_ELx_FPEN)
> + clr |= CPTR_EL2_TFP;
> + if (cpacr_set & CPACR_ELx_ZEN)
> + clr |= CPTR_EL2_TZ;
> + if (cpacr_set & CPACR_ELx_SMEN)
> + clr |= CPTR_EL2_TSM;
> + if (cpacr_set & CPACR_ELx_TTA)
> + set |= CPTR_EL2_TTA;
The duplication is pretty unfortunate. Having a single helper that
translate a register layout into another would be better.
> +
> + sysreg_clear_set(cptr_el2, clr, set);
And omit this...
> +}
> +
> +static inline void cpacr_clear_set(u64 clr, u64 set)
> +{
> + if (has_vhe() || has_hvhe())
> + sysreg_clear_set(cpacr_el1, clr, set);
> + else
> + __cptr_clear_set_nvhe(clr, set);
So that this could read as:
sysreg_clear_set(cptr_el2, cpacr_to_cptr(clr), cpacr_to_cptr(set));
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH v2 2/7] KVM: arm64: Abstract set/clear of CPTR_EL2 bits behind helper
2024-05-21 21:08 ` Marc Zyngier
@ 2024-05-22 13:48 ` Fuad Tabba
2024-05-28 7:58 ` Marc Zyngier
0 siblings, 1 reply; 21+ messages in thread
From: Fuad Tabba @ 2024-05-22 13:48 UTC (permalink / raw)
To: Marc Zyngier
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
Hi Marc,
On Tue, May 21, 2024 at 10:08 PM Marc Zyngier <maz@kernel.org> wrote:
>
> On Tue, 21 May 2024 17:37:15 +0100,
> Fuad Tabba <tabba@google.com> wrote:
> >
> > The same traps controlled by CPTR_EL2 or CPACR_EL1 need to be
> > toggled in different parts of the code, but the exact bits and
> > their polarity differ between these two formats and the mode
> > (vhe/nvhe/hvhe).
> >
> > To reduce the amount of duplicated code and the chance of getting
> > the wrong bit/polarity or missing a field, abstract the set/clear
> > of CPTR_EL2 bits behind a helper.
> >
> > Since (h)VHE is the way of the future, use the CPACR_EL1 format,
> > which is a subset of the VHE CPTR_EL2, as a reference.
> >
> > No functional change intended.
> >
> > Suggested-by: Oliver Upton <oliver.upton@linux.dev>
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > arch/arm64/include/asm/kvm_emulate.h | 34 +++++++++++++++++++++++++
> > arch/arm64/kvm/hyp/include/hyp/switch.h | 17 +++----------
> > arch/arm64/kvm/hyp/nvhe/hyp-main.c | 6 +----
> > 3 files changed, 39 insertions(+), 18 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> > index 501e3e019c93..74837d1762e5 100644
> > --- a/arch/arm64/include/asm/kvm_emulate.h
> > +++ b/arch/arm64/include/asm/kvm_emulate.h
> > @@ -557,6 +557,40 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
> > vcpu_set_flag((v), e); \
> > } while (0)
> >
> > +
> > +static inline void __cptr_clear_set_nvhe(u64 cpacr_clr, u64 cpacr_set)
> > +{
> > + u64 clr = 0, set = 0;
> > +
> > + if (cpacr_clr & CPACR_ELx_FPEN)
> > + set |= CPTR_EL2_TFP;
> > + if (cpacr_clr & CPACR_ELx_ZEN)
> > + set |= CPTR_EL2_TZ;
> > + if (cpacr_clr & CPACR_ELx_SMEN)
>
> These 3 fields are actually pairs of bits. Can we have a compile-time
> check that both bits are set?
Yes.
> > + set |= CPTR_EL2_TSM;
> > + if (cpacr_clr & CPACR_ELx_TTA)
> > + clr |= CPTR_EL2_TTA;
>
> How about TCPAC, TAM, and E0POE?
CPTR_EL2 (nvhe) doesn't have an equivalent to E0POE. Should I have a
compile time check that it's not cleared at all (since it has negative
polarity), or a runtime check for that only for nvhe?
As for TCPAC, TAM, they're not defined under the CPACR bits, but they
are in CPTR_EL2, which is what CPACR refers to in VHE. I will handle
them, but should I rename cpacr_to_cptr() to cptr_vhe_to_nvhe()?
>
> > +
> > + if (cpacr_set & CPACR_ELx_FPEN)
> > + clr |= CPTR_EL2_TFP;
> > + if (cpacr_set & CPACR_ELx_ZEN)
> > + clr |= CPTR_EL2_TZ;
> > + if (cpacr_set & CPACR_ELx_SMEN)
> > + clr |= CPTR_EL2_TSM;
> > + if (cpacr_set & CPACR_ELx_TTA)
> > + set |= CPTR_EL2_TTA;
>
> The duplication is pretty unfortunate. Having a single helper that
> translate a register layout into another would be better.
> > +
> > + sysreg_clear_set(cptr_el2, clr, set);
>
> And omit this...
>
> > +}
> > +
> > +static inline void cpacr_clear_set(u64 clr, u64 set)
> > +{
> > + if (has_vhe() || has_hvhe())
> > + sysreg_clear_set(cpacr_el1, clr, set);
> > + else
> > + __cptr_clear_set_nvhe(clr, set);
>
> So that this could read as:
>
> sysreg_clear_set(cptr_el2, cpacr_to_cptr(clr), cpacr_to_cptr(set));
I don't know how this could work with only one function/macro, while
covering all the fields. The problem is that the polarity of these
bits vary: some have the same polarity between CPACR/CPTR. and others
have the opposite polarity. Therefore, the conversion to `clr` is
different from the conversion to `set`.
Say we need to clear both CPACR_ELx_FPEN and CPACR_ELx_TTA. The
problem is that CPACR_ELx_FPEN has the opposite polarity as
CPTR_EL2_TFP, whereas CPACR_ELx_TTA has the same polarity as
CPTR_EL2_TTA (different position though).
What would cpacr_to_cptr() return that would work in the code you propose:
sysreg_clear_set(cptr_el2,
cpacr_to_cptr(CPACR_ELx_FPEN|CPACR_ELx_TTA), cpacr_to_cptr(0));
Desired result:
sysreg_clear_set(cptr_el2, CPTR_EL2_TTA, CPTR_EL2_TFP);
I can make it work with two functions/macros:
__cpacr_to_cptr_clear(clr, set) and __cpacr_to_cptr_set(clr, set) -->
sysreg_clear_set(cptr_el2, __cpacr_to_cptr_clear(clr, set),
__cpacr_to_cptr_set(clr, set));
Unfortunately this doesn't get rid of the duplication, but it might be
tidier (I converted them to macros to add compile-time checks later):
+#define __cpacr_to_cptr_clr(clr, set) \
+ ({ \
+ u64 cptr = 0; \
+ \
+ if ((set) & CPACR_ELx_FPEN) \
+ cptr |= CPTR_EL2_TFP; \
+ if ((set) & CPACR_ELx_ZEN) \
+ cptr |= CPTR_EL2_TZ; \
+ if ((set) & CPACR_ELx_SMEN) \
+ cptr |= CPTR_EL2_TSM; \
+ if ((clr) & CPACR_ELx_TTA) \
+ cptr |= CPTR_EL2_TTA; \
+ if ((clr) & CPTR_EL2_TAM) \
+ cptr |= CPTR_EL2_TAM; \
+ if ((clr) & CPTR_EL2_TCPAC) \
+ cptr |= CPTR_EL2_TCPAC; \
+ \
+ cptr; \
+ })
+
+#define __cpacr_to_cptr_set(clr, set) \
+ ({ \
+ u64 cptr = 0; \
+ \
+ if ((clr) & CPACR_ELx_FPEN) \
+ cptr |= CPTR_EL2_TFP; \
+ if ((clr) & CPACR_ELx_ZEN) \
+ cptr |= CPTR_EL2_TZ; \
+ if ((clr) & CPACR_ELx_SMEN) \
+ cptr |= CPTR_EL2_TSM; \
+ if ((set) & CPACR_ELx_TTA) \
+ cptr |= CPTR_EL2_TTA; \
+ if ((set) & CPTR_EL2_TAM) \
+ cptr |= CPTR_EL2_TAM; \
+ if ((set) & CPTR_EL2_TCPAC) \
+ cptr |= CPTR_EL2_TCPAC; \
+ \
+ cptr; \
+ })
+
+#define cpacr_clear_set(clr, set) \
+ do { \
+ if (has_vhe() || has_hvhe()) \
+ sysreg_clear_set(cpacr_el1, clr, set); \
+ else \
+ sysreg_clear_set(cptr_el2, \
+ __cpacr_to_cptr_clr(clr, set), \
+ __cpacr_to_cptr_set(clr, set));\
+ } while (0)
What do you think?
Cheers,
/fuad
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH v2 2/7] KVM: arm64: Abstract set/clear of CPTR_EL2 bits behind helper
2024-05-22 13:48 ` Fuad Tabba
@ 2024-05-28 7:58 ` Marc Zyngier
0 siblings, 0 replies; 21+ messages in thread
From: Marc Zyngier @ 2024-05-28 7:58 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
On Wed, 22 May 2024 14:48:19 +0100,
Fuad Tabba <tabba@google.com> wrote:
>
> Hi Marc,
>
> On Tue, May 21, 2024 at 10:08 PM Marc Zyngier <maz@kernel.org> wrote:
> >
> > On Tue, 21 May 2024 17:37:15 +0100,
> > Fuad Tabba <tabba@google.com> wrote:
> > >
> > > The same traps controlled by CPTR_EL2 or CPACR_EL1 need to be
> > > toggled in different parts of the code, but the exact bits and
> > > their polarity differ between these two formats and the mode
> > > (vhe/nvhe/hvhe).
> > >
> > > To reduce the amount of duplicated code and the chance of getting
> > > the wrong bit/polarity or missing a field, abstract the set/clear
> > > of CPTR_EL2 bits behind a helper.
> > >
> > > Since (h)VHE is the way of the future, use the CPACR_EL1 format,
> > > which is a subset of the VHE CPTR_EL2, as a reference.
> > >
> > > No functional change intended.
> > >
> > > Suggested-by: Oliver Upton <oliver.upton@linux.dev>
> > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > > ---
> > > arch/arm64/include/asm/kvm_emulate.h | 34 +++++++++++++++++++++++++
> > > arch/arm64/kvm/hyp/include/hyp/switch.h | 17 +++----------
> > > arch/arm64/kvm/hyp/nvhe/hyp-main.c | 6 +----
> > > 3 files changed, 39 insertions(+), 18 deletions(-)
> > >
> > > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
> > > index 501e3e019c93..74837d1762e5 100644
> > > --- a/arch/arm64/include/asm/kvm_emulate.h
> > > +++ b/arch/arm64/include/asm/kvm_emulate.h
> > > @@ -557,6 +557,40 @@ static __always_inline void kvm_incr_pc(struct kvm_vcpu *vcpu)
> > > vcpu_set_flag((v), e); \
> > > } while (0)
> > >
> > > +
> > > +static inline void __cptr_clear_set_nvhe(u64 cpacr_clr, u64 cpacr_set)
> > > +{
> > > + u64 clr = 0, set = 0;
> > > +
> > > + if (cpacr_clr & CPACR_ELx_FPEN)
> > > + set |= CPTR_EL2_TFP;
> > > + if (cpacr_clr & CPACR_ELx_ZEN)
> > > + set |= CPTR_EL2_TZ;
> > > + if (cpacr_clr & CPACR_ELx_SMEN)
> >
> > These 3 fields are actually pairs of bits. Can we have a compile-time
> > check that both bits are set?
>
> Yes.
>
> > > + set |= CPTR_EL2_TSM;
> > > + if (cpacr_clr & CPACR_ELx_TTA)
> > > + clr |= CPTR_EL2_TTA;
> >
> > How about TCPAC, TAM, and E0POE?
>
> CPTR_EL2 (nvhe) doesn't have an equivalent to E0POE. Should I have a
> compile time check that it's not cleared at all (since it has negative
> polarity), or a runtime check for that only for nvhe?
You're right, and I ended figuring that out while reworking the NV
side of CPTR_EL2 handling.
>
> As for TCPAC, TAM, they're not defined under the CPACR bits, but they
> are in CPTR_EL2, which is what CPACR refers to in VHE. I will handle
> them, but should I rename cpacr_to_cptr() to cptr_vhe_to_nvhe()?
I think this is clear enough in context, but I don't mind either way.
>
> >
> > > +
> > > + if (cpacr_set & CPACR_ELx_FPEN)
> > > + clr |= CPTR_EL2_TFP;
> > > + if (cpacr_set & CPACR_ELx_ZEN)
> > > + clr |= CPTR_EL2_TZ;
> > > + if (cpacr_set & CPACR_ELx_SMEN)
> > > + clr |= CPTR_EL2_TSM;
> > > + if (cpacr_set & CPACR_ELx_TTA)
> > > + set |= CPTR_EL2_TTA;
> >
> > The duplication is pretty unfortunate. Having a single helper that
> > translate a register layout into another would be better.
> > > +
> > > + sysreg_clear_set(cptr_el2, clr, set);
> >
> > And omit this...
> >
> > > +}
> > > +
> > > +static inline void cpacr_clear_set(u64 clr, u64 set)
> > > +{
> > > + if (has_vhe() || has_hvhe())
> > > + sysreg_clear_set(cpacr_el1, clr, set);
> > > + else
> > > + __cptr_clear_set_nvhe(clr, set);
> >
> > So that this could read as:
> >
> > sysreg_clear_set(cptr_el2, cpacr_to_cptr(clr), cpacr_to_cptr(set));
>
> I don't know how this could work with only one function/macro, while
> covering all the fields. The problem is that the polarity of these
> bits vary: some have the same polarity between CPACR/CPTR. and others
> have the opposite polarity. Therefore, the conversion to `clr` is
> different from the conversion to `set`.
Ah, you're absolutely right. I totally glanced over the fact that the
polarities are inverted, totally ruining the fun.
>
> Say we need to clear both CPACR_ELx_FPEN and CPACR_ELx_TTA. The
> problem is that CPACR_ELx_FPEN has the opposite polarity as
> CPTR_EL2_TFP, whereas CPACR_ELx_TTA has the same polarity as
> CPTR_EL2_TTA (different position though).
>
> What would cpacr_to_cptr() return that would work in the code you propose:
> sysreg_clear_set(cptr_el2,
> cpacr_to_cptr(CPACR_ELx_FPEN|CPACR_ELx_TTA), cpacr_to_cptr(0));
>
> Desired result:
> sysreg_clear_set(cptr_el2, CPTR_EL2_TTA, CPTR_EL2_TFP);
>
> I can make it work with two functions/macros:
> __cpacr_to_cptr_clear(clr, set) and __cpacr_to_cptr_set(clr, set) -->
> sysreg_clear_set(cptr_el2, __cpacr_to_cptr_clear(clr, set),
> __cpacr_to_cptr_set(clr, set));
>
> Unfortunately this doesn't get rid of the duplication, but it might be
> tidier (I converted them to macros to add compile-time checks later):
>
> +#define __cpacr_to_cptr_clr(clr, set) \
> + ({ \
> + u64 cptr = 0; \
> + \
> + if ((set) & CPACR_ELx_FPEN) \
> + cptr |= CPTR_EL2_TFP; \
> + if ((set) & CPACR_ELx_ZEN) \
> + cptr |= CPTR_EL2_TZ; \
> + if ((set) & CPACR_ELx_SMEN) \
> + cptr |= CPTR_EL2_TSM; \
> + if ((clr) & CPACR_ELx_TTA) \
> + cptr |= CPTR_EL2_TTA; \
> + if ((clr) & CPTR_EL2_TAM) \
> + cptr |= CPTR_EL2_TAM; \
> + if ((clr) & CPTR_EL2_TCPAC) \
> + cptr |= CPTR_EL2_TCPAC; \
> + \
> + cptr; \
> + })
> +
> +#define __cpacr_to_cptr_set(clr, set) \
> + ({ \
> + u64 cptr = 0; \
> + \
> + if ((clr) & CPACR_ELx_FPEN) \
> + cptr |= CPTR_EL2_TFP; \
> + if ((clr) & CPACR_ELx_ZEN) \
> + cptr |= CPTR_EL2_TZ; \
> + if ((clr) & CPACR_ELx_SMEN) \
> + cptr |= CPTR_EL2_TSM; \
> + if ((set) & CPACR_ELx_TTA) \
> + cptr |= CPTR_EL2_TTA; \
> + if ((set) & CPTR_EL2_TAM) \
> + cptr |= CPTR_EL2_TAM; \
> + if ((set) & CPTR_EL2_TCPAC) \
> + cptr |= CPTR_EL2_TCPAC; \
> + \
> + cptr; \
> + })
> +
> +#define cpacr_clear_set(clr, set) \
> + do { \
> + if (has_vhe() || has_hvhe()) \
> + sysreg_clear_set(cpacr_el1, clr, set); \
> + else \
> + sysreg_clear_set(cptr_el2, \
> + __cpacr_to_cptr_clr(clr, set), \
> + __cpacr_to_cptr_set(clr, set));\
> + } while (0)
>
> What do you think?
I quite like this. It is a large body of code, but it is easy to read
and symmetric.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2 3/7] KVM: arm64: Specialize handling of host fpsimd state on trap
2024-05-21 16:37 [PATCH v2 0/7] KVM: arm64: Fix handling of host fpsimd/sve state in protected mode Fuad Tabba
2024-05-21 16:37 ` [PATCH v2 1/7] KVM: arm64: Reintroduce __sve_save_state Fuad Tabba
2024-05-21 16:37 ` [PATCH v2 2/7] KVM: arm64: Abstract set/clear of CPTR_EL2 bits behind helper Fuad Tabba
@ 2024-05-21 16:37 ` Fuad Tabba
2024-05-21 16:37 ` [PATCH v2 4/7] KVM: arm64: Store the maximum sve vector length at hyp Fuad Tabba
` (3 subsequent siblings)
6 siblings, 0 replies; 21+ messages in thread
From: Fuad Tabba @ 2024-05-21 16:37 UTC (permalink / raw)
To: kvmarm, linux-arm-kernel
Cc: maz, will, qperret, tabba, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
In subsequent patches, n/vhe will diverge on saving the host
fpsimd/sve state when taking a guest fpsimd/sve trap. Add a
specialized helper to handle it.
No functional change intended.
Signed-off-by: Fuad Tabba <tabba@google.com>
---
arch/arm64/kvm/hyp/include/hyp/switch.h | 4 +++-
arch/arm64/kvm/hyp/nvhe/switch.c | 5 +++++
arch/arm64/kvm/hyp/vhe/switch.c | 5 +++++
3 files changed, 13 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 2ebe2fea8768..1897b73e635c 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -320,6 +320,8 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
}
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
+
/*
* We trap the first access to the FP/SIMD to save the host context and
* restore the guest context lazily.
@@ -361,7 +363,7 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* Write out the host state if it's in the registers */
if (host_owns_fp_regs())
- __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+ kvm_hyp_save_fpsimd_host(vcpu);
/* Restore the guest state */
if (sve_guest)
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 6758cd905570..019f863922fa 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -182,6 +182,11 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
kvm_handle_pvm_sysreg(vcpu, exit_code));
}
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c
index d7af5f46f22a..20073579e9f5 100644
--- a/arch/arm64/kvm/hyp/vhe/switch.c
+++ b/arch/arm64/kvm/hyp/vhe/switch.c
@@ -262,6 +262,11 @@ static bool kvm_hyp_handle_eret(struct kvm_vcpu *vcpu, u64 *exit_code)
return true;
}
+static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
+{
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+}
+
static const exit_handler_fn hyp_exit_handlers[] = {
[0 ... ESR_ELx_EC_MAX] = NULL,
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
--
2.45.0.215.g3402c0e53f-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 21+ messages in thread* [PATCH v2 4/7] KVM: arm64: Store the maximum sve vector length at hyp
2024-05-21 16:37 [PATCH v2 0/7] KVM: arm64: Fix handling of host fpsimd/sve state in protected mode Fuad Tabba
` (2 preceding siblings ...)
2024-05-21 16:37 ` [PATCH v2 3/7] KVM: arm64: Specialize handling of host fpsimd state on trap Fuad Tabba
@ 2024-05-21 16:37 ` Fuad Tabba
2024-05-21 21:21 ` Marc Zyngier
2024-05-21 16:37 ` [PATCH v2 5/7] KVM: arm64: Allocate memory at hyp for host sve state in pKVM Fuad Tabba
` (2 subsequent siblings)
6 siblings, 1 reply; 21+ messages in thread
From: Fuad Tabba @ 2024-05-21 16:37 UTC (permalink / raw)
To: kvmarm, linux-arm-kernel
Cc: maz, will, qperret, tabba, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
In subsequent patches, hyp needs to know the maximum sve vector
length for the host, without needing the trust the host for that.
This is used when allocating memory for the host sve state in the
following patch, as well as for allocating and restricting guest
sve state in a future patch series.
Signed-off-by: Fuad Tabba <tabba@google.com>
---
arch/arm64/include/asm/kvm_host.h | 1 +
arch/arm64/include/asm/kvm_hyp.h | 1 +
arch/arm64/kvm/arm.c | 1 +
arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 ++
arch/arm64/kvm/reset.c | 2 ++
5 files changed, 7 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8170c04fde91..0a5fceb20f3a 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -76,6 +76,7 @@ static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
extern unsigned int __ro_after_init kvm_sve_max_vl;
+extern unsigned int __ro_after_init kvm_host_sve_max_vl;
int __init kvm_arm_init_sve(void);
u32 __attribute_const__ kvm_target_cpu(void);
diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index 2ab23589339a..d313adf53bef 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -143,5 +143,6 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
extern unsigned long kvm_nvhe_sym(__icache_flags);
extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
+extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
#endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9996a989b52e..9e565ea3d645 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2378,6 +2378,7 @@ static void kvm_hyp_init_symbols(void)
kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1);
kvm_nvhe_sym(__icache_flags) = __icache_flags;
kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
+ kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl;
}
static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 16aa4875ddb8..25e9a94f6d76 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -18,6 +18,8 @@ unsigned long __icache_flags;
/* Used by kvm_get_vttbr(). */
unsigned int kvm_arm_vmid_bits;
+unsigned int kvm_host_sve_max_vl;
+
/*
* Set trap register values based on features in ID_AA64PFR0.
*/
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 1b7b58cb121f..e818727900ec 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -46,11 +46,13 @@ static u32 __ro_after_init kvm_ipa_limit;
PSR_AA32_I_BIT | PSR_AA32_F_BIT)
unsigned int __ro_after_init kvm_sve_max_vl;
+unsigned int __ro_after_init kvm_host_sve_max_vl;
int __init kvm_arm_init_sve(void)
{
if (system_supports_sve()) {
kvm_sve_max_vl = sve_max_virtualisable_vl();
+ kvm_host_sve_max_vl = sve_max_vl();
/*
* The get_sve_reg()/set_sve_reg() ioctl interface will need
--
2.45.0.215.g3402c0e53f-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH v2 4/7] KVM: arm64: Store the maximum sve vector length at hyp
2024-05-21 16:37 ` [PATCH v2 4/7] KVM: arm64: Store the maximum sve vector length at hyp Fuad Tabba
@ 2024-05-21 21:21 ` Marc Zyngier
2024-05-22 14:36 ` Fuad Tabba
0 siblings, 1 reply; 21+ messages in thread
From: Marc Zyngier @ 2024-05-21 21:21 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
On Tue, 21 May 2024 17:37:17 +0100,
Fuad Tabba <tabba@google.com> wrote:
>
> In subsequent patches, hyp needs to know the maximum sve vector
> length for the host, without needing the trust the host for that.
> This is used when allocating memory for the host sve state in the
> following patch, as well as for allocating and restricting guest
> sve state in a future patch series.
>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
> arch/arm64/include/asm/kvm_host.h | 1 +
> arch/arm64/include/asm/kvm_hyp.h | 1 +
> arch/arm64/kvm/arm.c | 1 +
> arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 ++
> arch/arm64/kvm/reset.c | 2 ++
> 5 files changed, 7 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 8170c04fde91..0a5fceb20f3a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -76,6 +76,7 @@ static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
> DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
>
> extern unsigned int __ro_after_init kvm_sve_max_vl;
> +extern unsigned int __ro_after_init kvm_host_sve_max_vl;
> int __init kvm_arm_init_sve(void);
>
> u32 __attribute_const__ kvm_target_cpu(void);
> diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
> index 2ab23589339a..d313adf53bef 100644
> --- a/arch/arm64/include/asm/kvm_hyp.h
> +++ b/arch/arm64/include/asm/kvm_hyp.h
> @@ -143,5 +143,6 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
>
> extern unsigned long kvm_nvhe_sym(__icache_flags);
> extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
> +extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
>
> #endif /* __ARM64_KVM_HYP_H__ */
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 9996a989b52e..9e565ea3d645 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -2378,6 +2378,7 @@ static void kvm_hyp_init_symbols(void)
> kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1);
> kvm_nvhe_sym(__icache_flags) = __icache_flags;
> kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
> + kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl;
> }
>
> static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
> diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> index 16aa4875ddb8..25e9a94f6d76 100644
> --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
> +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> @@ -18,6 +18,8 @@ unsigned long __icache_flags;
> /* Used by kvm_get_vttbr(). */
> unsigned int kvm_arm_vmid_bits;
>
> +unsigned int kvm_host_sve_max_vl;
> +
> /*
> * Set trap register values based on features in ID_AA64PFR0.
> */
> diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> index 1b7b58cb121f..e818727900ec 100644
> --- a/arch/arm64/kvm/reset.c
> +++ b/arch/arm64/kvm/reset.c
> @@ -46,11 +46,13 @@ static u32 __ro_after_init kvm_ipa_limit;
> PSR_AA32_I_BIT | PSR_AA32_F_BIT)
>
> unsigned int __ro_after_init kvm_sve_max_vl;
> +unsigned int __ro_after_init kvm_host_sve_max_vl;
>
> int __init kvm_arm_init_sve(void)
> {
> if (system_supports_sve()) {
> kvm_sve_max_vl = sve_max_virtualisable_vl();
> + kvm_host_sve_max_vl = sve_max_vl();
Why the intermediate storage?
Setting kvm_nvhe_sym(kvm_host_sve_max_vl) to sve_max_vl() should be
enough. But it doesn't mean that all the CPUs support the same max VL,
and I wonder whether we end-up with the wrong in-memory layout in this
case...
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH v2 4/7] KVM: arm64: Store the maximum sve vector length at hyp
2024-05-21 21:21 ` Marc Zyngier
@ 2024-05-22 14:36 ` Fuad Tabba
0 siblings, 0 replies; 21+ messages in thread
From: Fuad Tabba @ 2024-05-22 14:36 UTC (permalink / raw)
To: Marc Zyngier
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
Hi Marc,
On Tue, May 21, 2024 at 10:21 PM Marc Zyngier <maz@kernel.org> wrote:
>
> On Tue, 21 May 2024 17:37:17 +0100,
> Fuad Tabba <tabba@google.com> wrote:
> >
> > In subsequent patches, hyp needs to know the maximum sve vector
> > length for the host, without needing the trust the host for that.
> > This is used when allocating memory for the host sve state in the
> > following patch, as well as for allocating and restricting guest
> > sve state in a future patch series.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > arch/arm64/include/asm/kvm_host.h | 1 +
> > arch/arm64/include/asm/kvm_hyp.h | 1 +
> > arch/arm64/kvm/arm.c | 1 +
> > arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 ++
> > arch/arm64/kvm/reset.c | 2 ++
> > 5 files changed, 7 insertions(+)
> >
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 8170c04fde91..0a5fceb20f3a 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -76,6 +76,7 @@ static inline enum kvm_mode kvm_get_mode(void) { return KVM_MODE_NONE; };
> > DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use);
> >
> > extern unsigned int __ro_after_init kvm_sve_max_vl;
> > +extern unsigned int __ro_after_init kvm_host_sve_max_vl;
> > int __init kvm_arm_init_sve(void);
> >
> > u32 __attribute_const__ kvm_target_cpu(void);
> > diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
> > index 2ab23589339a..d313adf53bef 100644
> > --- a/arch/arm64/include/asm/kvm_hyp.h
> > +++ b/arch/arm64/include/asm/kvm_hyp.h
> > @@ -143,5 +143,6 @@ extern u64 kvm_nvhe_sym(id_aa64smfr0_el1_sys_val);
> >
> > extern unsigned long kvm_nvhe_sym(__icache_flags);
> > extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
> > +extern unsigned int kvm_nvhe_sym(kvm_host_sve_max_vl);
> >
> > #endif /* __ARM64_KVM_HYP_H__ */
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index 9996a989b52e..9e565ea3d645 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -2378,6 +2378,7 @@ static void kvm_hyp_init_symbols(void)
> > kvm_nvhe_sym(id_aa64smfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64SMFR0_EL1);
> > kvm_nvhe_sym(__icache_flags) = __icache_flags;
> > kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
> > + kvm_nvhe_sym(kvm_host_sve_max_vl) = kvm_host_sve_max_vl;
> > }
> >
> > static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
> > diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > index 16aa4875ddb8..25e9a94f6d76 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > @@ -18,6 +18,8 @@ unsigned long __icache_flags;
> > /* Used by kvm_get_vttbr(). */
> > unsigned int kvm_arm_vmid_bits;
> >
> > +unsigned int kvm_host_sve_max_vl;
> > +
> > /*
> > * Set trap register values based on features in ID_AA64PFR0.
> > */
> > diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
> > index 1b7b58cb121f..e818727900ec 100644
> > --- a/arch/arm64/kvm/reset.c
> > +++ b/arch/arm64/kvm/reset.c
> > @@ -46,11 +46,13 @@ static u32 __ro_after_init kvm_ipa_limit;
> > PSR_AA32_I_BIT | PSR_AA32_F_BIT)
> >
> > unsigned int __ro_after_init kvm_sve_max_vl;
> > +unsigned int __ro_after_init kvm_host_sve_max_vl;
> >
> > int __init kvm_arm_init_sve(void)
> > {
> > if (system_supports_sve()) {
> > kvm_sve_max_vl = sve_max_virtualisable_vl();
> > + kvm_host_sve_max_vl = sve_max_vl();
>
> Why the intermediate storage?
It's not needed for this patch, but rather for the one that allocates
the memory for the host sve state, to be able to reuse the same
functions at el1 and hyp, e.g., pkvm_host_sve_state_size() . I'll move
it to where it's used or find a way of getting rid of it altogether.
> Setting kvm_nvhe_sym(kvm_host_sve_max_vl) to sve_max_vl() should be
> enough. But it doesn't mean that all the CPUs support the same max VL,
> and I wonder whether we end-up with the wrong in-memory layout in this
> case...
Looking at how the value behind sve_max_vl() is calculated
(vl_info[ARM64_VEC_SVE].max_vl;): it seems to store the maximum VL in
common to all CPUs, which in turn becomes a limit to the usable VLs in
the system as a whole.
So for example PR_SVE_SET_VL (sve_set_current_vl()) eventually comes
down to find_supported_vector_length(), which limits the VL set to the
lengths supported in vl_info[ARM64_VEC_SVE], which are system-wide
rather than per cpu.
So if my understanding is correct, I don't think we need to allocate
more memory than sve_max_vl() to store the host state. It shouldn't be
a problem in terms of layout either, as long as we consistently use it
for save and restore. Similar to how kvm_arch_vcpu_put_fp() always
uses the vcpu_sve_max_vq(). It does need to be documented, as you
mention in a later patch.
Thanks,
/fuad
>
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2 5/7] KVM: arm64: Allocate memory at hyp for host sve state in pKVM
2024-05-21 16:37 [PATCH v2 0/7] KVM: arm64: Fix handling of host fpsimd/sve state in protected mode Fuad Tabba
` (3 preceding siblings ...)
2024-05-21 16:37 ` [PATCH v2 4/7] KVM: arm64: Store the maximum sve vector length at hyp Fuad Tabba
@ 2024-05-21 16:37 ` Fuad Tabba
2024-05-21 21:44 ` Marc Zyngier
2024-05-21 16:37 ` [PATCH v2 6/7] KVM: arm64: Eagerly restore host fpsimd/sve " Fuad Tabba
2024-05-21 16:37 ` [PATCH v2 7/7] KVM: arm64: Consolidate initializing the host data's fpsimd_state/sve " Fuad Tabba
6 siblings, 1 reply; 21+ messages in thread
From: Fuad Tabba @ 2024-05-21 16:37 UTC (permalink / raw)
To: kvmarm, linux-arm-kernel
Cc: maz, will, qperret, tabba, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
Protected mode needs to maintain (save/restore) the host's sve
state, rather than relying on the host kernel to do that. This is
to avoid leaking information to the host about guests and the
type of operations they are performing.
As a first step towards that, allocate memory at hyp, per cpu, to
hold the host sve data. The following patch will use this memory
to save/restore the host state.
Signed-off-by: Fuad Tabba <tabba@google.com>
---
Note that the last patch in this series will consolidate the
setup of the host's fpsimd and sve states, which currently take
place in two different locations. Moreover, that last patch will
also place the host fpsimd and sve_state pointers in a union.
---
arch/arm64/include/asm/kvm_host.h | 2 +
arch/arm64/include/asm/kvm_pkvm.h | 9 ++++
arch/arm64/include/uapi/asm/ptrace.h | 14 ++++++
arch/arm64/kvm/arm.c | 68 ++++++++++++++++++++++++++++
arch/arm64/kvm/hyp/nvhe/setup.c | 24 ++++++++++
5 files changed, 117 insertions(+)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 0a5fceb20f3a..7b3745ef1d73 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -535,7 +535,9 @@ struct kvm_cpu_context {
*/
struct kvm_host_data {
struct kvm_cpu_context host_ctxt;
+
struct user_fpsimd_state *fpsimd_state; /* hyp VA */
+ struct user_sve_state *sve_state; /* hyp VA */
/* Ownership of the FP regs */
enum {
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index ad9cfb5c1ff4..b9d12e123efb 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -128,4 +128,13 @@ static inline unsigned long hyp_ffa_proxy_pages(void)
return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
}
+static inline size_t pkvm_host_sve_state_size(void)
+{
+ if (!system_supports_sve())
+ return 0;
+
+ return size_add(sizeof(struct user_sve_state),
+ SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
+}
+
#endif /* __ARM64_KVM_PKVM_H__ */
diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
index 7fa2f7036aa7..77aabf964071 100644
--- a/arch/arm64/include/uapi/asm/ptrace.h
+++ b/arch/arm64/include/uapi/asm/ptrace.h
@@ -120,6 +120,20 @@ struct user_sve_header {
__u16 __reserved;
};
+struct user_sve_state {
+ __u64 zcr_el1;
+
+ /*
+ * Ordering is important since __sve_save_state/__sve_restore_state
+ * relies on it.
+ */
+ __u32 fpsr;
+ __u32 fpcr;
+
+ /* Must be SVE_VQ_BYTES (128 bit) aligned. */
+ __u8 sve_regs[];
+};
+
/* Definitions for user_sve_header.flags: */
#define SVE_PT_REGS_MASK (1 << 0)
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 9e565ea3d645..a9b1b0e9c319 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1931,6 +1931,11 @@ static unsigned long nvhe_percpu_order(void)
return size ? get_order(size) : 0;
}
+static size_t pkvm_host_sve_state_order(void)
+{
+ return get_order(pkvm_host_sve_state_size());
+}
+
/* A lookup table holding the hypervisor VA for each vector slot */
static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
@@ -2316,7 +2321,15 @@ static void __init teardown_hyp_mode(void)
for_each_possible_cpu(cpu) {
free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
+
+ if (system_supports_sve() && is_protected_kvm_enabled()) {
+ struct user_sve_state *sve_state;
+
+ sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+ free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
+ }
}
+
}
static int __init do_pkvm_init(u32 hyp_va_bits)
@@ -2399,6 +2412,50 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
return 0;
}
+static int init_pkvm_host_sve_state(void)
+{
+ int cpu;
+
+ if (!system_supports_sve())
+ return 0;
+
+ /* Allocate pages for host sve state in protected mode. */
+ for_each_possible_cpu(cpu) {
+ struct page *page = alloc_pages(GFP_KERNEL, pkvm_host_sve_state_order());
+
+ if (!page)
+ return -ENOMEM;
+
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = page_address(page);
+ }
+
+ /*
+ * Don't map the pages in hyp since these are only used in protected
+ * mode, which will (re)create its own mapping when initialized.
+ */
+
+ return 0;
+}
+
+/*
+ * Finalizes the initialization of hyp mode, once everything else is initialized
+ * and the initialziation process cannot fail.
+ */
+static void finalize_init_hyp_mode(void)
+{
+ int cpu;
+
+ if (!is_protected_kvm_enabled() || !system_supports_sve())
+ return;
+
+ for_each_possible_cpu(cpu) {
+ struct user_sve_state *sve_state;
+
+ sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = kern_hyp_va(sve_state);
+ }
+}
+
static void pkvm_hyp_init_ptrauth(void)
{
struct kvm_cpu_context *hyp_ctxt;
@@ -2567,6 +2624,10 @@ static int __init init_hyp_mode(void)
goto out_err;
}
+ err = init_pkvm_host_sve_state();
+ if (err)
+ goto out_err;
+
err = kvm_hyp_init_protection(hyp_va_bits);
if (err) {
kvm_err("Failed to init hyp memory protection\n");
@@ -2731,6 +2792,13 @@ static __init int kvm_arm_init(void)
if (err)
goto out_subs;
+ /*
+ * This should be called after initialization is done and failure isn't
+ * possible anymore.
+ */
+ if (!in_hyp_mode)
+ finalize_init_hyp_mode();
+
kvm_arm_initialised = true;
return 0;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 859f22f754d3..5c8cd806efb9 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -67,6 +67,28 @@ static int divide_memory_pool(void *virt, unsigned long size)
return 0;
}
+static int pkvm_create_host_sve_mappings(void)
+{
+ void *start, *end;
+ int ret, i;
+
+ if (!system_supports_sve())
+ return 0;
+
+ for (i = 0; i < hyp_nr_cpus; i++) {
+ struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
+ struct user_sve_state *sve_state = host_data->sve_state;
+
+ start = kern_hyp_va(sve_state);
+ end = start + PAGE_ALIGN(pkvm_host_sve_state_size());
+ ret = pkvm_create_mappings(start, end, PAGE_HYP);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
unsigned long *per_cpu_base,
u32 hyp_va_bits)
@@ -125,6 +147,8 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
return ret;
}
+ pkvm_create_host_sve_mappings();
+
/*
* Map the host sections RO in the hypervisor, but transfer the
* ownership from the host to the hypervisor itself to make sure they
--
2.45.0.215.g3402c0e53f-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH v2 5/7] KVM: arm64: Allocate memory at hyp for host sve state in pKVM
2024-05-21 16:37 ` [PATCH v2 5/7] KVM: arm64: Allocate memory at hyp for host sve state in pKVM Fuad Tabba
@ 2024-05-21 21:44 ` Marc Zyngier
2024-05-22 14:37 ` Fuad Tabba
0 siblings, 1 reply; 21+ messages in thread
From: Marc Zyngier @ 2024-05-21 21:44 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
On Tue, 21 May 2024 17:37:18 +0100,
Fuad Tabba <tabba@google.com> wrote:
>
> Protected mode needs to maintain (save/restore) the host's sve
> state, rather than relying on the host kernel to do that. This is
> to avoid leaking information to the host about guests and the
> type of operations they are performing.
>
> As a first step towards that, allocate memory at hyp, per cpu, to
> hold the host sve data. The following patch will use this memory
> to save/restore the host state.
What I read in the code contradicts this statement. The memory is
definitely allocated on the *host*.
>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
> Note that the last patch in this series will consolidate the
> setup of the host's fpsimd and sve states, which currently take
> place in two different locations. Moreover, that last patch will
> also place the host fpsimd and sve_state pointers in a union.
> ---
> arch/arm64/include/asm/kvm_host.h | 2 +
> arch/arm64/include/asm/kvm_pkvm.h | 9 ++++
> arch/arm64/include/uapi/asm/ptrace.h | 14 ++++++
> arch/arm64/kvm/arm.c | 68 ++++++++++++++++++++++++++++
> arch/arm64/kvm/hyp/nvhe/setup.c | 24 ++++++++++
> 5 files changed, 117 insertions(+)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 0a5fceb20f3a..7b3745ef1d73 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -535,7 +535,9 @@ struct kvm_cpu_context {
> */
> struct kvm_host_data {
> struct kvm_cpu_context host_ctxt;
> +
> struct user_fpsimd_state *fpsimd_state; /* hyp VA */
> + struct user_sve_state *sve_state; /* hyp VA */
>
> /* Ownership of the FP regs */
> enum {
> diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
> index ad9cfb5c1ff4..b9d12e123efb 100644
> --- a/arch/arm64/include/asm/kvm_pkvm.h
> +++ b/arch/arm64/include/asm/kvm_pkvm.h
> @@ -128,4 +128,13 @@ static inline unsigned long hyp_ffa_proxy_pages(void)
> return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
> }
>
> +static inline size_t pkvm_host_sve_state_size(void)
> +{
> + if (!system_supports_sve())
> + return 0;
> +
> + return size_add(sizeof(struct user_sve_state),
> + SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
> +}
> +
> #endif /* __ARM64_KVM_PKVM_H__ */
> diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
> index 7fa2f7036aa7..77aabf964071 100644
> --- a/arch/arm64/include/uapi/asm/ptrace.h
> +++ b/arch/arm64/include/uapi/asm/ptrace.h
> @@ -120,6 +120,20 @@ struct user_sve_header {
> __u16 __reserved;
> };
>
> +struct user_sve_state {
> + __u64 zcr_el1;
> +
> + /*
> + * Ordering is important since __sve_save_state/__sve_restore_state
> + * relies on it.
> + */
> + __u32 fpsr;
> + __u32 fpcr;
> +
> + /* Must be SVE_VQ_BYTES (128 bit) aligned. */
> + __u8 sve_regs[];
> +};
> +
Huh, why is this in uapi? Why should userspace even care about this at
all? From what I can tell, this is purely internal to the kernel, and
in any case, KVM isn't tied to that format if it doesn't dump stuff in
the userspace thread context. Given the number of bugs this format has
generated, I really wouldn't mind moving away from it.
> /* Definitions for user_sve_header.flags: */
> #define SVE_PT_REGS_MASK (1 << 0)
>
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index 9e565ea3d645..a9b1b0e9c319 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -1931,6 +1931,11 @@ static unsigned long nvhe_percpu_order(void)
> return size ? get_order(size) : 0;
> }
>
> +static size_t pkvm_host_sve_state_order(void)
> +{
> + return get_order(pkvm_host_sve_state_size());
> +}
> +
> /* A lookup table holding the hypervisor VA for each vector slot */
> static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
>
> @@ -2316,7 +2321,15 @@ static void __init teardown_hyp_mode(void)
> for_each_possible_cpu(cpu) {
> free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
> free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
> +
> + if (system_supports_sve() && is_protected_kvm_enabled()) {
> + struct user_sve_state *sve_state;
> +
> + sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
> + free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
> + }
> }
> +
> }
>
> static int __init do_pkvm_init(u32 hyp_va_bits)
> @@ -2399,6 +2412,50 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
> return 0;
> }
>
> +static int init_pkvm_host_sve_state(void)
> +{
> + int cpu;
> +
> + if (!system_supports_sve())
> + return 0;
> +
> + /* Allocate pages for host sve state in protected mode. */
> + for_each_possible_cpu(cpu) {
> + struct page *page = alloc_pages(GFP_KERNEL, pkvm_host_sve_state_order());
> +
See? That's definitely not a HYP allocation.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH v2 5/7] KVM: arm64: Allocate memory at hyp for host sve state in pKVM
2024-05-21 21:44 ` Marc Zyngier
@ 2024-05-22 14:37 ` Fuad Tabba
2024-05-28 8:16 ` Marc Zyngier
0 siblings, 1 reply; 21+ messages in thread
From: Fuad Tabba @ 2024-05-22 14:37 UTC (permalink / raw)
To: Marc Zyngier
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
Hi Marc,
On Tue, May 21, 2024 at 10:44 PM Marc Zyngier <maz@kernel.org> wrote:
>
> On Tue, 21 May 2024 17:37:18 +0100,
> Fuad Tabba <tabba@google.com> wrote:
> >
> > Protected mode needs to maintain (save/restore) the host's sve
> > state, rather than relying on the host kernel to do that. This is
> > to avoid leaking information to the host about guests and the
> > type of operations they are performing.
> >
> > As a first step towards that, allocate memory at hyp, per cpu, to
> > hold the host sve data. The following patch will use this memory
> > to save/restore the host state.
>
> What I read in the code contradicts this statement. The memory is
> definitely allocated on the *host*.
You're right. I should say memory mapped at hyp.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > Note that the last patch in this series will consolidate the
> > setup of the host's fpsimd and sve states, which currently take
> > place in two different locations. Moreover, that last patch will
> > also place the host fpsimd and sve_state pointers in a union.
> > ---
> > arch/arm64/include/asm/kvm_host.h | 2 +
> > arch/arm64/include/asm/kvm_pkvm.h | 9 ++++
> > arch/arm64/include/uapi/asm/ptrace.h | 14 ++++++
> > arch/arm64/kvm/arm.c | 68 ++++++++++++++++++++++++++++
> > arch/arm64/kvm/hyp/nvhe/setup.c | 24 ++++++++++
> > 5 files changed, 117 insertions(+)
> >
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 0a5fceb20f3a..7b3745ef1d73 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -535,7 +535,9 @@ struct kvm_cpu_context {
> > */
> > struct kvm_host_data {
> > struct kvm_cpu_context host_ctxt;
> > +
> > struct user_fpsimd_state *fpsimd_state; /* hyp VA */
> > + struct user_sve_state *sve_state; /* hyp VA */
> >
> > /* Ownership of the FP regs */
> > enum {
> > diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
> > index ad9cfb5c1ff4..b9d12e123efb 100644
> > --- a/arch/arm64/include/asm/kvm_pkvm.h
> > +++ b/arch/arm64/include/asm/kvm_pkvm.h
> > @@ -128,4 +128,13 @@ static inline unsigned long hyp_ffa_proxy_pages(void)
> > return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
> > }
> >
> > +static inline size_t pkvm_host_sve_state_size(void)
> > +{
> > + if (!system_supports_sve())
> > + return 0;
> > +
> > + return size_add(sizeof(struct user_sve_state),
> > + SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
> > +}
> > +
> > #endif /* __ARM64_KVM_PKVM_H__ */
> > diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
> > index 7fa2f7036aa7..77aabf964071 100644
> > --- a/arch/arm64/include/uapi/asm/ptrace.h
> > +++ b/arch/arm64/include/uapi/asm/ptrace.h
> > @@ -120,6 +120,20 @@ struct user_sve_header {
> > __u16 __reserved;
> > };
> >
> > +struct user_sve_state {
> > + __u64 zcr_el1;
> > +
> > + /*
> > + * Ordering is important since __sve_save_state/__sve_restore_state
> > + * relies on it.
> > + */
> > + __u32 fpsr;
> > + __u32 fpcr;
> > +
> > + /* Must be SVE_VQ_BYTES (128 bit) aligned. */
> > + __u8 sve_regs[];
> > +};
> > +
>
> Huh, why is this in uapi? Why should userspace even care about this at
> all? From what I can tell, this is purely internal to the kernel, and
> in any case, KVM isn't tied to that format if it doesn't dump stuff in
> the userspace thread context. Given the number of bugs this format has
> generated, I really wouldn't mind moving away from it.
I put it here since the sve_header is here, as well as other similar structures.
Should I move it to asm/kvm_pkvm.h?
> > /* Definitions for user_sve_header.flags: */
> > #define SVE_PT_REGS_MASK (1 << 0)
> >
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index 9e565ea3d645..a9b1b0e9c319 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -1931,6 +1931,11 @@ static unsigned long nvhe_percpu_order(void)
> > return size ? get_order(size) : 0;
> > }
> >
> > +static size_t pkvm_host_sve_state_order(void)
> > +{
> > + return get_order(pkvm_host_sve_state_size());
> > +}
> > +
> > /* A lookup table holding the hypervisor VA for each vector slot */
> > static void *hyp_spectre_vector_selector[BP_HARDEN_EL2_SLOTS];
> >
> > @@ -2316,7 +2321,15 @@ static void __init teardown_hyp_mode(void)
> > for_each_possible_cpu(cpu) {
> > free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
> > free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
> > +
> > + if (system_supports_sve() && is_protected_kvm_enabled()) {
> > + struct user_sve_state *sve_state;
> > +
> > + sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
> > + free_pages((unsigned long) sve_state, pkvm_host_sve_state_order());
> > + }
> > }
> > +
> > }
> >
> > static int __init do_pkvm_init(u32 hyp_va_bits)
> > @@ -2399,6 +2412,50 @@ static int __init kvm_hyp_init_protection(u32 hyp_va_bits)
> > return 0;
> > }
> >
> > +static int init_pkvm_host_sve_state(void)
> > +{
> > + int cpu;
> > +
> > + if (!system_supports_sve())
> > + return 0;
> > +
> > + /* Allocate pages for host sve state in protected mode. */
> > + for_each_possible_cpu(cpu) {
> > + struct page *page = alloc_pages(GFP_KERNEL, pkvm_host_sve_state_order());
> > +
>
> See? That's definitely not a HYP allocation.
Nope :) I will fix the commit message and relevant comments in this patch.
Thanks,
/fuad
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
>
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH v2 5/7] KVM: arm64: Allocate memory at hyp for host sve state in pKVM
2024-05-22 14:37 ` Fuad Tabba
@ 2024-05-28 8:16 ` Marc Zyngier
0 siblings, 0 replies; 21+ messages in thread
From: Marc Zyngier @ 2024-05-28 8:16 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
On Wed, 22 May 2024 15:37:53 +0100,
Fuad Tabba <tabba@google.com> wrote:
>
> Hi Marc,
>
> On Tue, May 21, 2024 at 10:44 PM Marc Zyngier <maz@kernel.org> wrote:
> >
> > On Tue, 21 May 2024 17:37:18 +0100,
> > Fuad Tabba <tabba@google.com> wrote:
> > >
> > > Protected mode needs to maintain (save/restore) the host's sve
> > > state, rather than relying on the host kernel to do that. This is
> > > to avoid leaking information to the host about guests and the
> > > type of operations they are performing.
> > >
> > > As a first step towards that, allocate memory at hyp, per cpu, to
> > > hold the host sve data. The following patch will use this memory
> > > to save/restore the host state.
> >
> > What I read in the code contradicts this statement. The memory is
> > definitely allocated on the *host*.
>
> You're right. I should say memory mapped at hyp.
>
> > >
> > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > > ---
> > > Note that the last patch in this series will consolidate the
> > > setup of the host's fpsimd and sve states, which currently take
> > > place in two different locations. Moreover, that last patch will
> > > also place the host fpsimd and sve_state pointers in a union.
> > > ---
> > > arch/arm64/include/asm/kvm_host.h | 2 +
> > > arch/arm64/include/asm/kvm_pkvm.h | 9 ++++
> > > arch/arm64/include/uapi/asm/ptrace.h | 14 ++++++
> > > arch/arm64/kvm/arm.c | 68 ++++++++++++++++++++++++++++
> > > arch/arm64/kvm/hyp/nvhe/setup.c | 24 ++++++++++
> > > 5 files changed, 117 insertions(+)
> > >
> > > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > > index 0a5fceb20f3a..7b3745ef1d73 100644
> > > --- a/arch/arm64/include/asm/kvm_host.h
> > > +++ b/arch/arm64/include/asm/kvm_host.h
> > > @@ -535,7 +535,9 @@ struct kvm_cpu_context {
> > > */
> > > struct kvm_host_data {
> > > struct kvm_cpu_context host_ctxt;
> > > +
> > > struct user_fpsimd_state *fpsimd_state; /* hyp VA */
> > > + struct user_sve_state *sve_state; /* hyp VA */
> > >
> > > /* Ownership of the FP regs */
> > > enum {
> > > diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
> > > index ad9cfb5c1ff4..b9d12e123efb 100644
> > > --- a/arch/arm64/include/asm/kvm_pkvm.h
> > > +++ b/arch/arm64/include/asm/kvm_pkvm.h
> > > @@ -128,4 +128,13 @@ static inline unsigned long hyp_ffa_proxy_pages(void)
> > > return (2 * KVM_FFA_MBOX_NR_PAGES) + DIV_ROUND_UP(desc_max, PAGE_SIZE);
> > > }
> > >
> > > +static inline size_t pkvm_host_sve_state_size(void)
> > > +{
> > > + if (!system_supports_sve())
> > > + return 0;
> > > +
> > > + return size_add(sizeof(struct user_sve_state),
> > > + SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
> > > +}
> > > +
> > > #endif /* __ARM64_KVM_PKVM_H__ */
> > > diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h
> > > index 7fa2f7036aa7..77aabf964071 100644
> > > --- a/arch/arm64/include/uapi/asm/ptrace.h
> > > +++ b/arch/arm64/include/uapi/asm/ptrace.h
> > > @@ -120,6 +120,20 @@ struct user_sve_header {
> > > __u16 __reserved;
> > > };
> > >
> > > +struct user_sve_state {
> > > + __u64 zcr_el1;
> > > +
> > > + /*
> > > + * Ordering is important since __sve_save_state/__sve_restore_state
> > > + * relies on it.
> > > + */
> > > + __u32 fpsr;
> > > + __u32 fpcr;
> > > +
> > > + /* Must be SVE_VQ_BYTES (128 bit) aligned. */
> > > + __u8 sve_regs[];
> > > +};
> > > +
> >
> > Huh, why is this in uapi? Why should userspace even care about this at
> > all? From what I can tell, this is purely internal to the kernel, and
> > in any case, KVM isn't tied to that format if it doesn't dump stuff in
> > the userspace thread context. Given the number of bugs this format has
> > generated, I really wouldn't mind moving away from it.
>
> I put it here since the sve_header is here, as well as other similar structures.
>
> Should I move it to asm/kvm_pkvm.h?
kvm_host.h or kvm_types.h seem like the best option (replacing the
"user" prefix with something else), as this is directly consumed by
kvm_host.h.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2 6/7] KVM: arm64: Eagerly restore host fpsimd/sve state in pKVM
2024-05-21 16:37 [PATCH v2 0/7] KVM: arm64: Fix handling of host fpsimd/sve state in protected mode Fuad Tabba
` (4 preceding siblings ...)
2024-05-21 16:37 ` [PATCH v2 5/7] KVM: arm64: Allocate memory at hyp for host sve state in pKVM Fuad Tabba
@ 2024-05-21 16:37 ` Fuad Tabba
2024-05-21 22:52 ` Marc Zyngier
2024-05-21 16:37 ` [PATCH v2 7/7] KVM: arm64: Consolidate initializing the host data's fpsimd_state/sve " Fuad Tabba
6 siblings, 1 reply; 21+ messages in thread
From: Fuad Tabba @ 2024-05-21 16:37 UTC (permalink / raw)
To: kvmarm, linux-arm-kernel
Cc: maz, will, qperret, tabba, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
When running in protected mode we don't want to leak protected
guest state to the host, including whether a guest has used
fpsimd/sve. Therefore, eagerly restore the host state on guest
exit when running in protected mode, which happens only if the
guest has used fpsimd/sve.
As a future optimisation, we could go back to lazy restoring
state at the host after exiting non-protected guests.
Signed-off-by: Fuad Tabba <tabba@google.com>
---
arch/arm64/kvm/hyp/include/hyp/switch.h | 13 +++++-
arch/arm64/kvm/hyp/nvhe/hyp-main.c | 53 +++++++++++++++++++++++--
arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +
arch/arm64/kvm/hyp/nvhe/switch.c | 16 +++++++-
4 files changed, 79 insertions(+), 5 deletions(-)
diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
index 1897b73e635c..2fa29bfec0b6 100644
--- a/arch/arm64/kvm/hyp/include/hyp/switch.h
+++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
@@ -320,6 +320,16 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
}
+static inline void __hyp_sve_save_host(void)
+{
+ struct user_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
+ sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr);
+}
+
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
/*
@@ -356,7 +366,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
/* First disable enough traps to allow us to update the registers */
reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
- if (sve_guest)
+ if (sve_guest ||
+ (is_protected_kvm_enabled() && system_supports_sve()))
reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
cpacr_clear_set(0, reg);
isb();
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index b07d44484f42..f79f0f7b2759 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -23,20 +23,66 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
+static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
+{
+ __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
+ sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
+ __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr);
+ sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+}
+
+static void __hyp_sve_restore_host(void)
+{
+ struct user_sve_state *sve_state = *host_data_ptr(sve_state);
+
+ sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
+ __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
+ &sve_state->fpsr);
+ write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
+}
+
+static void fpsimd_sve_flush(void)
+{
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
+static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
+{
+ if (!guest_owns_fp_regs())
+ return;
+
+ cpacr_clear_set(0, (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN |
+ CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN));
+ isb();
+
+ if (vcpu_has_sve(vcpu))
+ __hyp_sve_save_guest(vcpu);
+ else
+ __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
+
+ if (system_supports_sve())
+ __hyp_sve_restore_host();
+ else
+ __fpsimd_restore_state(*host_data_ptr(fpsimd_state));
+
+ *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
+}
+
static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
{
struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+ fpsimd_sve_flush();
+
hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
- hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl;
+ hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2;
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
- hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
@@ -54,10 +100,11 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
unsigned int i;
+ fpsimd_sve_sync(&hyp_vcpu->vcpu);
+
host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2;
- host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2;
host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 25e9a94f6d76..feb27b4ce459 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -588,6 +588,8 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
if (ret)
unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
+ hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
+
return ret;
}
diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
index 019f863922fa..5bf437682b94 100644
--- a/arch/arm64/kvm/hyp/nvhe/switch.c
+++ b/arch/arm64/kvm/hyp/nvhe/switch.c
@@ -184,7 +184,21 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
{
- __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+ /*
+ * Non-protected kvm relies on the host restoring its sve state.
+ * Protected kvm restores the host's sve state as not to reveal that
+ * fpsimd was used by a guest nor leak upper sve bits.
+ */
+ if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
+ __hyp_sve_save_host();
+
+ /* Re-enable SVE traps if not supported for the guest vcpu. */
+ if (!vcpu_has_sve(vcpu))
+ cpacr_clear_set(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN, 0);
+
+ } else {
+ __fpsimd_save_state(*host_data_ptr(fpsimd_state));
+ }
}
static const exit_handler_fn hyp_exit_handlers[] = {
--
2.45.0.215.g3402c0e53f-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH v2 6/7] KVM: arm64: Eagerly restore host fpsimd/sve state in pKVM
2024-05-21 16:37 ` [PATCH v2 6/7] KVM: arm64: Eagerly restore host fpsimd/sve " Fuad Tabba
@ 2024-05-21 22:52 ` Marc Zyngier
2024-05-22 14:48 ` Fuad Tabba
0 siblings, 1 reply; 21+ messages in thread
From: Marc Zyngier @ 2024-05-21 22:52 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
On Tue, 21 May 2024 17:37:19 +0100,
Fuad Tabba <tabba@google.com> wrote:
>
> When running in protected mode we don't want to leak protected
> guest state to the host, including whether a guest has used
> fpsimd/sve. Therefore, eagerly restore the host state on guest
> exit when running in protected mode, which happens only if the
> guest has used fpsimd/sve.
>
> As a future optimisation, we could go back to lazy restoring
> state at the host after exiting non-protected guests.
No. This sort of things is way too invasive and would require mapping
the VHE host data at EL2. If we need to optimise this crap, then we
apply the same techniques as we use for guests. If that's good enough
for the guests, that's good enough for the host.
>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
> arch/arm64/kvm/hyp/include/hyp/switch.h | 13 +++++-
> arch/arm64/kvm/hyp/nvhe/hyp-main.c | 53 +++++++++++++++++++++++--
> arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +
> arch/arm64/kvm/hyp/nvhe/switch.c | 16 +++++++-
> 4 files changed, 79 insertions(+), 5 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> index 1897b73e635c..2fa29bfec0b6 100644
> --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> @@ -320,6 +320,16 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
> write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
> }
>
> +static inline void __hyp_sve_save_host(void)
> +{
> + struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> +
> + sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
> + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> + __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
> + &sve_state->fpsr);
> +}
> +
> static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
>
> /*
> @@ -356,7 +366,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
>
> /* First disable enough traps to allow us to update the registers */
> reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
> - if (sve_guest)
> + if (sve_guest ||
> + (is_protected_kvm_enabled() && system_supports_sve()))
This looks really ugly. Why can't we just compute sve_guest to take
these conditions into account?
> reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
> cpacr_clear_set(0, reg);
> isb();
> diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> index b07d44484f42..f79f0f7b2759 100644
> --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> @@ -23,20 +23,66 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
>
> void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
>
> +static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
> +{
> + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
> + sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
> + __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr);
> + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> +}
> +
> +static void __hyp_sve_restore_host(void)
> +{
> + struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> +
> + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> + __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
This is what I was worried about in a previous patch.
kvm_host_sve_max_vl represents the max VL across all CPUs. if CPU0
supports 128bit SVE and CPU1 256bit, the value is 256. but on CPU0,
ZCR_ELx_LEN_MASK will results in using 128bit accesses, and the offset
will be wrong. I can't convince myself that anything really goes wrong
as long as we're consistent between save and restore, but that's at
best ugly and needs extensive documenting.
On top of that, a conditional update of ZCR_EL2 with ZCR_ELx_LEN_MASK
is unlikely to be beneficial, since nobody implements 2k vectors. A
direct write followed by an ISB is likely to be better.
> + &sve_state->fpsr);
> + write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
> +}
> +
> +static void fpsimd_sve_flush(void)
> +{
> + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
> +}
> +
> +static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
> +{
> + if (!guest_owns_fp_regs())
> + return;
> +
> + cpacr_clear_set(0, (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN |
> + CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN));
> + isb();
> +
> + if (vcpu_has_sve(vcpu))
> + __hyp_sve_save_guest(vcpu);
> + else
> + __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
> +
> + if (system_supports_sve())
> + __hyp_sve_restore_host();
> + else
> + __fpsimd_restore_state(*host_data_ptr(fpsimd_state));
> +
> + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
> +}
> +
> static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
> {
> struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
>
> + fpsimd_sve_flush();
> +
> hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
>
> hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
> - hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl;
> + hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
>
> hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
>
> hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2;
> hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
> - hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
>
> hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
>
> @@ -54,10 +100,11 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
> struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
> unsigned int i;
>
> + fpsimd_sve_sync(&hyp_vcpu->vcpu);
> +
> host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
>
> host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2;
> - host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2;
>
> host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> index 25e9a94f6d76..feb27b4ce459 100644
> --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
> +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> @@ -588,6 +588,8 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
> if (ret)
> unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
>
> + hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
> +
Eventually, we need to rename this to cpacr_el2 and make sure we
consistently use the VHE format everywhere. I'm starting to be worried
that we mix things badly.
> return ret;
> }
>
> diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> index 019f863922fa..5bf437682b94 100644
> --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> @@ -184,7 +184,21 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
>
> static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
> {
> - __fpsimd_save_state(*host_data_ptr(fpsimd_state));
> + /*
> + * Non-protected kvm relies on the host restoring its sve state.
> + * Protected kvm restores the host's sve state as not to reveal that
> + * fpsimd was used by a guest nor leak upper sve bits.
> + */
> + if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
> + __hyp_sve_save_host();
> +
> + /* Re-enable SVE traps if not supported for the guest vcpu. */
> + if (!vcpu_has_sve(vcpu))
> + cpacr_clear_set(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN, 0);
> +
nit: spurious new line
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH v2 6/7] KVM: arm64: Eagerly restore host fpsimd/sve state in pKVM
2024-05-21 22:52 ` Marc Zyngier
@ 2024-05-22 14:48 ` Fuad Tabba
2024-05-28 8:21 ` Marc Zyngier
0 siblings, 1 reply; 21+ messages in thread
From: Fuad Tabba @ 2024-05-22 14:48 UTC (permalink / raw)
To: Marc Zyngier
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
Hi Marc,
On Tue, May 21, 2024 at 11:52 PM Marc Zyngier <maz@kernel.org> wrote:
>
> On Tue, 21 May 2024 17:37:19 +0100,
> Fuad Tabba <tabba@google.com> wrote:
> >
> > When running in protected mode we don't want to leak protected
> > guest state to the host, including whether a guest has used
> > fpsimd/sve. Therefore, eagerly restore the host state on guest
> > exit when running in protected mode, which happens only if the
> > guest has used fpsimd/sve.
> >
> > As a future optimisation, we could go back to lazy restoring
> > state at the host after exiting non-protected guests.
>
> No. This sort of things is way too invasive and would require mapping
> the VHE host data at EL2. If we need to optimise this crap, then we
> apply the same techniques as we use for guests. If that's good enough
> for the guests, that's good enough for the host.
:D
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > arch/arm64/kvm/hyp/include/hyp/switch.h | 13 +++++-
> > arch/arm64/kvm/hyp/nvhe/hyp-main.c | 53 +++++++++++++++++++++++--
> > arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +
> > arch/arm64/kvm/hyp/nvhe/switch.c | 16 +++++++-
> > 4 files changed, 79 insertions(+), 5 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> > index 1897b73e635c..2fa29bfec0b6 100644
> > --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> > +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> > @@ -320,6 +320,16 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
> > write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
> > }
> >
> > +static inline void __hyp_sve_save_host(void)
> > +{
> > + struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> > +
> > + sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
> > + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > + __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
> > + &sve_state->fpsr);
> > +}
> > +
> > static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
> >
> > /*
> > @@ -356,7 +366,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
> >
> > /* First disable enough traps to allow us to update the registers */
> > reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
> > - if (sve_guest)
> > + if (sve_guest ||
> > + (is_protected_kvm_enabled() && system_supports_sve()))
>
> This looks really ugly. Why can't we just compute sve_guest to take
> these conditions into account?
Because this is just for disabling the SVE traps, which we would need
to do even in the case where the guest doesn't support SVE in order to
be able to store the host sve state.
sve_guest is also used later in the function to determine whether we
need to restore the guest's sve state, and earlier to determine
whether an SVE trap from a non-sve guest should result in injecting an
undef instruction.
That said, maybe the following is a bit less ugly?
if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
cpacr_clear_set(0, CPACR_ELx_FPEN|CPACR_ELx_ZEN);
else
cpacr_clear_set(0, CPACR_ELx_FPEN);
>
> > reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
> > cpacr_clear_set(0, reg);
> > isb();
> > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > index b07d44484f42..f79f0f7b2759 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > @@ -23,20 +23,66 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
> >
> > void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
> >
> > +static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
> > +{
> > + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
> > + sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
> > + __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr);
> > + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > +}
> > +
> > +static void __hyp_sve_restore_host(void)
> > +{
> > + struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> > +
> > + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > + __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
>
> This is what I was worried about in a previous patch.
> kvm_host_sve_max_vl represents the max VL across all CPUs. if CPU0
> supports 128bit SVE and CPU1 256bit, the value is 256. but on CPU0,
> ZCR_ELx_LEN_MASK will results in using 128bit accesses, and the offset
> will be wrong. I can't convince myself that anything really goes wrong
> as long as we're consistent between save and restore, but that's at
> best ugly and needs extensive documenting.
As I mentioned in my reply to the previous patch, I _think_ that it
represents the maximum common VL across CPUs. Since the value comes
from vl_info.vq_map, which is calculated by filtering out all the VQs
not supported.
In kvm_arch_vcpu_put_fp(), we always assume the maximum VL supported
by the guest, regardless of which cpu this happens to be running on.
That said, there's a comment there outlining that. I'll add a comment
here to document it.
> On top of that, a conditional update of ZCR_EL2 with ZCR_ELx_LEN_MASK
> is unlikely to be beneficial, since nobody implements 2k vectors. A
> direct write followed by an ISB is likely to be better.
Will do.
> > + &sve_state->fpsr);
> > + write_sysreg_el1(sve_state->zcr_el1, SYS_ZCR);
> > +}
> > +
> > +static void fpsimd_sve_flush(void)
> > +{
> > + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
> > +}
> > +
> > +static void fpsimd_sve_sync(struct kvm_vcpu *vcpu)
> > +{
> > + if (!guest_owns_fp_regs())
> > + return;
> > +
> > + cpacr_clear_set(0, (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN |
> > + CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN));
> > + isb();
> > +
> > + if (vcpu_has_sve(vcpu))
> > + __hyp_sve_save_guest(vcpu);
> > + else
> > + __fpsimd_save_state(&vcpu->arch.ctxt.fp_regs);
> > +
> > + if (system_supports_sve())
> > + __hyp_sve_restore_host();
> > + else
> > + __fpsimd_restore_state(*host_data_ptr(fpsimd_state));
> > +
> > + *host_data_ptr(fp_owner) = FP_STATE_HOST_OWNED;
> > +}
> > +
> > static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
> > {
> > struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
> >
> > + fpsimd_sve_flush();
> > +
> > hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt;
> >
> > hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state);
> > - hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl;
> > + hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
> >
> > hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
> >
> > hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2;
> > hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
> > - hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2;
> >
> > hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags;
> >
> > @@ -54,10 +100,11 @@ static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
> > struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
> > unsigned int i;
> >
> > + fpsimd_sve_sync(&hyp_vcpu->vcpu);
> > +
> > host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt;
> >
> > host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2;
> > - host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2;
> >
> > host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault;
> >
> > diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > index 25e9a94f6d76..feb27b4ce459 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > @@ -588,6 +588,8 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
> > if (ret)
> > unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
> >
> > + hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
> > +
>
> Eventually, we need to rename this to cpacr_el2 and make sure we
> consistently use the VHE format everywhere. I'm starting to be worried
> that we mix things badly.
Would you like me to do that in this patch series, or should I submit
another one after this that does this cleanup?
> > return ret;
> > }
> >
> > diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c
> > index 019f863922fa..5bf437682b94 100644
> > --- a/arch/arm64/kvm/hyp/nvhe/switch.c
> > +++ b/arch/arm64/kvm/hyp/nvhe/switch.c
> > @@ -184,7 +184,21 @@ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code)
> >
> > static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
> > {
> > - __fpsimd_save_state(*host_data_ptr(fpsimd_state));
> > + /*
> > + * Non-protected kvm relies on the host restoring its sve state.
> > + * Protected kvm restores the host's sve state as not to reveal that
> > + * fpsimd was used by a guest nor leak upper sve bits.
> > + */
> > + if (unlikely(is_protected_kvm_enabled() && system_supports_sve())) {
> > + __hyp_sve_save_host();
> > +
> > + /* Re-enable SVE traps if not supported for the guest vcpu. */
> > + if (!vcpu_has_sve(vcpu))
> > + cpacr_clear_set(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN, 0);
> > +
>
> nit: spurious new line
Ack.
Thank you!
/fuad
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH v2 6/7] KVM: arm64: Eagerly restore host fpsimd/sve state in pKVM
2024-05-22 14:48 ` Fuad Tabba
@ 2024-05-28 8:21 ` Marc Zyngier
0 siblings, 0 replies; 21+ messages in thread
From: Marc Zyngier @ 2024-05-28 8:21 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
On Wed, 22 May 2024 15:48:39 +0100,
Fuad Tabba <tabba@google.com> wrote:
>
> Hi Marc,
>
> On Tue, May 21, 2024 at 11:52 PM Marc Zyngier <maz@kernel.org> wrote:
> >
> > On Tue, 21 May 2024 17:37:19 +0100,
> > Fuad Tabba <tabba@google.com> wrote:
> > >
> > > When running in protected mode we don't want to leak protected
> > > guest state to the host, including whether a guest has used
> > > fpsimd/sve. Therefore, eagerly restore the host state on guest
> > > exit when running in protected mode, which happens only if the
> > > guest has used fpsimd/sve.
> > >
> > > As a future optimisation, we could go back to lazy restoring
> > > state at the host after exiting non-protected guests.
> >
> > No. This sort of things is way too invasive and would require mapping
> > the VHE host data at EL2. If we need to optimise this crap, then we
> > apply the same techniques as we use for guests. If that's good enough
> > for the guests, that's good enough for the host.
>
> :D
>
> > >
> > > Signed-off-by: Fuad Tabba <tabba@google.com>
> > > ---
> > > arch/arm64/kvm/hyp/include/hyp/switch.h | 13 +++++-
> > > arch/arm64/kvm/hyp/nvhe/hyp-main.c | 53 +++++++++++++++++++++++--
> > > arch/arm64/kvm/hyp/nvhe/pkvm.c | 2 +
> > > arch/arm64/kvm/hyp/nvhe/switch.c | 16 +++++++-
> > > 4 files changed, 79 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h
> > > index 1897b73e635c..2fa29bfec0b6 100644
> > > --- a/arch/arm64/kvm/hyp/include/hyp/switch.h
> > > +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h
> > > @@ -320,6 +320,16 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu)
> > > write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR);
> > > }
> > >
> > > +static inline void __hyp_sve_save_host(void)
> > > +{
> > > + struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> > > +
> > > + sve_state->zcr_el1 = read_sysreg_el1(SYS_ZCR);
> > > + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > > + __sve_save_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
> > > + &sve_state->fpsr);
> > > +}
> > > +
> > > static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu);
> > >
> > > /*
> > > @@ -356,7 +366,8 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code)
> > >
> > > /* First disable enough traps to allow us to update the registers */
> > > reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN;
> > > - if (sve_guest)
> > > + if (sve_guest ||
> > > + (is_protected_kvm_enabled() && system_supports_sve()))
> >
> > This looks really ugly. Why can't we just compute sve_guest to take
> > these conditions into account?
>
> Because this is just for disabling the SVE traps, which we would need
> to do even in the case where the guest doesn't support SVE in order to
> be able to store the host sve state.
>
> sve_guest is also used later in the function to determine whether we
> need to restore the guest's sve state, and earlier to determine
> whether an SVE trap from a non-sve guest should result in injecting an
> undef instruction.
>
> That said, maybe the following is a bit less ugly?
>
> if (sve_guest || (is_protected_kvm_enabled() && system_supports_sve()))
> cpacr_clear_set(0, CPACR_ELx_FPEN|CPACR_ELx_ZEN);
> else
> cpacr_clear_set(0, CPACR_ELx_FPEN);
Yes, this looks marginally better. Overall, this helper is in dire
need of a full rewrite...
> >
> > > reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN;
> > > cpacr_clear_set(0, reg);
> > > isb();
> > > diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > > index b07d44484f42..f79f0f7b2759 100644
> > > --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > > +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
> > > @@ -23,20 +23,66 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
> > >
> > > void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
> > >
> > > +static void __hyp_sve_save_guest(struct kvm_vcpu *vcpu)
> > > +{
> > > + __vcpu_sys_reg(vcpu, ZCR_EL1) = read_sysreg_el1(SYS_ZCR);
> > > + sve_cond_update_zcr_vq(vcpu_sve_max_vq(vcpu) - 1, SYS_ZCR_EL2);
> > > + __sve_save_state(vcpu_sve_pffr(vcpu), &vcpu->arch.ctxt.fp_regs.fpsr);
> > > + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > > +}
> > > +
> > > +static void __hyp_sve_restore_host(void)
> > > +{
> > > + struct user_sve_state *sve_state = *host_data_ptr(sve_state);
> > > +
> > > + sve_cond_update_zcr_vq(ZCR_ELx_LEN_MASK, SYS_ZCR_EL2);
> > > + __sve_restore_state(sve_state->sve_regs + sve_ffr_offset(kvm_host_sve_max_vl),
> >
> > This is what I was worried about in a previous patch.
> > kvm_host_sve_max_vl represents the max VL across all CPUs. if CPU0
> > supports 128bit SVE and CPU1 256bit, the value is 256. but on CPU0,
> > ZCR_ELx_LEN_MASK will results in using 128bit accesses, and the offset
> > will be wrong. I can't convince myself that anything really goes wrong
> > as long as we're consistent between save and restore, but that's at
> > best ugly and needs extensive documenting.
>
> As I mentioned in my reply to the previous patch, I _think_ that it
> represents the maximum common VL across CPUs. Since the value comes
> from vl_info.vq_map, which is calculated by filtering out all the VQs
> not supported.
>
> In kvm_arch_vcpu_put_fp(), we always assume the maximum VL supported
> by the guest, regardless of which cpu this happens to be running on.
> That said, there's a comment there outlining that. I'll add a comment
> here to document it.
That'd be helpful, thanks.
>
> > On top of that, a conditional update of ZCR_EL2 with ZCR_ELx_LEN_MASK
> > is unlikely to be beneficial, since nobody implements 2k vectors. A
> > direct write followed by an ISB is likely to be better.
>
> Will do.
[..]
> > > diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > > index 25e9a94f6d76..feb27b4ce459 100644
> > > --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > > +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
> > > @@ -588,6 +588,8 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
> > > if (ret)
> > > unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
> > >
> > > + hyp_vcpu->vcpu.arch.cptr_el2 = kvm_get_reset_cptr_el2(&hyp_vcpu->vcpu);
> > > +
> >
> > Eventually, we need to rename this to cpacr_el2 and make sure we
> > consistently use the VHE format everywhere. I'm starting to be worried
> > that we mix things badly.
>
> Would you like me to do that in this patch series, or should I submit
> another one after this that does this cleanup?
Later please. Let's focus on getting this series across first.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH v2 7/7] KVM: arm64: Consolidate initializing the host data's fpsimd_state/sve in pKVM
2024-05-21 16:37 [PATCH v2 0/7] KVM: arm64: Fix handling of host fpsimd/sve state in protected mode Fuad Tabba
` (5 preceding siblings ...)
2024-05-21 16:37 ` [PATCH v2 6/7] KVM: arm64: Eagerly restore host fpsimd/sve " Fuad Tabba
@ 2024-05-21 16:37 ` Fuad Tabba
2024-05-21 22:55 ` Marc Zyngier
6 siblings, 1 reply; 21+ messages in thread
From: Fuad Tabba @ 2024-05-21 16:37 UTC (permalink / raw)
To: kvmarm, linux-arm-kernel
Cc: maz, will, qperret, tabba, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
Now that we have introduced finalize_init_hyp_mode(), lets
consolidate the initializing of the host_data fpsimd_state and
sve state.
Signed-off-by: Fuad Tabba <tabba@google.com>
---
arch/arm64/include/asm/kvm_host.h | 10 ++++++++--
arch/arm64/kvm/arm.c | 18 ++++++++++++------
arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 1 -
arch/arm64/kvm/hyp/nvhe/pkvm.c | 11 -----------
arch/arm64/kvm/hyp/nvhe/setup.c | 1 -
5 files changed, 20 insertions(+), 21 deletions(-)
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7b3745ef1d73..8a170f314498 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -536,8 +536,14 @@ struct kvm_cpu_context {
struct kvm_host_data {
struct kvm_cpu_context host_ctxt;
- struct user_fpsimd_state *fpsimd_state; /* hyp VA */
- struct user_sve_state *sve_state; /* hyp VA */
+ /*
+ * All pointers in this union are hyp VA.
+ * sve_state is only used in pKVM and if system_supports_sve().
+ */
+ union {
+ struct user_fpsimd_state *fpsimd_state;
+ struct user_sve_state *sve_state;
+ };
/* Ownership of the FP regs */
enum {
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index a9b1b0e9c319..a1c7e0ad6951 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2445,14 +2445,20 @@ static void finalize_init_hyp_mode(void)
{
int cpu;
- if (!is_protected_kvm_enabled() || !system_supports_sve())
- return;
-
for_each_possible_cpu(cpu) {
- struct user_sve_state *sve_state;
+ if (system_supports_sve() && is_protected_kvm_enabled()) {
+ struct user_sve_state *sve_state;
- sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
- per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = kern_hyp_va(sve_state);
+ sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
+ kern_hyp_va(sve_state);
+ } else {
+ struct user_fpsimd_state *fpsimd_state;
+
+ fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
+ per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
+ kern_hyp_va(fpsimd_state);
+ }
}
}
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 22f374e9f532..24a9a8330d19 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -59,7 +59,6 @@ static inline bool pkvm_hyp_vcpu_is_protected(struct pkvm_hyp_vcpu *hyp_vcpu)
}
void pkvm_hyp_vm_table_init(void *tbl);
-void pkvm_host_fpsimd_state_init(void);
int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
unsigned long pgd_hva);
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index feb27b4ce459..ea67fcbf8376 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -249,17 +249,6 @@ void pkvm_hyp_vm_table_init(void *tbl)
vm_table = tbl;
}
-void pkvm_host_fpsimd_state_init(void)
-{
- unsigned long i;
-
- for (i = 0; i < hyp_nr_cpus; i++) {
- struct kvm_host_data *host_data = per_cpu_ptr(&kvm_host_data, i);
-
- host_data->fpsimd_state = &host_data->host_ctxt.fp_regs;
- }
-}
-
/*
* Return the hyp vm structure corresponding to the handle.
*/
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 5c8cd806efb9..84f766ab1810 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -324,7 +324,6 @@ void __noreturn __pkvm_init_finalise(void)
goto out;
pkvm_hyp_vm_table_init(vm_table_base);
- pkvm_host_fpsimd_state_init();
out:
/*
* We tail-called to here from handle___pkvm_init() and will not return,
--
2.45.0.215.g3402c0e53f-goog
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply related [flat|nested] 21+ messages in thread* Re: [PATCH v2 7/7] KVM: arm64: Consolidate initializing the host data's fpsimd_state/sve in pKVM
2024-05-21 16:37 ` [PATCH v2 7/7] KVM: arm64: Consolidate initializing the host data's fpsimd_state/sve " Fuad Tabba
@ 2024-05-21 22:55 ` Marc Zyngier
2024-05-22 14:49 ` Fuad Tabba
0 siblings, 1 reply; 21+ messages in thread
From: Marc Zyngier @ 2024-05-21 22:55 UTC (permalink / raw)
To: Fuad Tabba
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
On Tue, 21 May 2024 17:37:20 +0100,
Fuad Tabba <tabba@google.com> wrote:
>
> Now that we have introduced finalize_init_hyp_mode(), lets
> consolidate the initializing of the host_data fpsimd_state and
> sve state.
>
> Signed-off-by: Fuad Tabba <tabba@google.com>
> ---
> arch/arm64/include/asm/kvm_host.h | 10 ++++++++--
> arch/arm64/kvm/arm.c | 18 ++++++++++++------
> arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 1 -
> arch/arm64/kvm/hyp/nvhe/pkvm.c | 11 -----------
> arch/arm64/kvm/hyp/nvhe/setup.c | 1 -
> 5 files changed, 20 insertions(+), 21 deletions(-)
>
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index 7b3745ef1d73..8a170f314498 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -536,8 +536,14 @@ struct kvm_cpu_context {
> struct kvm_host_data {
> struct kvm_cpu_context host_ctxt;
>
> - struct user_fpsimd_state *fpsimd_state; /* hyp VA */
> - struct user_sve_state *sve_state; /* hyp VA */
> + /*
> + * All pointers in this union are hyp VA.
> + * sve_state is only used in pKVM and if system_supports_sve().
> + */
> + union {
> + struct user_fpsimd_state *fpsimd_state;
> + struct user_sve_state *sve_state;
> + };
>
> /* Ownership of the FP regs */
> enum {
> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> index a9b1b0e9c319..a1c7e0ad6951 100644
> --- a/arch/arm64/kvm/arm.c
> +++ b/arch/arm64/kvm/arm.c
> @@ -2445,14 +2445,20 @@ static void finalize_init_hyp_mode(void)
> {
> int cpu;
>
> - if (!is_protected_kvm_enabled() || !system_supports_sve())
> - return;
> -
> for_each_possible_cpu(cpu) {
> - struct user_sve_state *sve_state;
> + if (system_supports_sve() && is_protected_kvm_enabled()) {
> + struct user_sve_state *sve_state;
>
> - sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
> - per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = kern_hyp_va(sve_state);
> + sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
> + per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
> + kern_hyp_va(sve_state);
> + } else {
> + struct user_fpsimd_state *fpsimd_state;
> +
> + fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
> + per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
> + kern_hyp_va(fpsimd_state);
> + }
nit: SVE support and protected state do not change on a per CPU basis,
so checking for these inside the loop is pretty counter intuitive.
Thanks,
M.
--
Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread* Re: [PATCH v2 7/7] KVM: arm64: Consolidate initializing the host data's fpsimd_state/sve in pKVM
2024-05-21 22:55 ` Marc Zyngier
@ 2024-05-22 14:49 ` Fuad Tabba
0 siblings, 0 replies; 21+ messages in thread
From: Fuad Tabba @ 2024-05-22 14:49 UTC (permalink / raw)
To: Marc Zyngier
Cc: kvmarm, linux-arm-kernel, will, qperret, seanjc, alexandru.elisei,
catalin.marinas, philmd, james.morse, suzuki.poulose,
oliver.upton, mark.rutland, broonie, joey.gouly, rananta,
yuzenghui
Hi Marc,
On Tue, May 21, 2024 at 11:56 PM Marc Zyngier <maz@kernel.org> wrote:
>
> On Tue, 21 May 2024 17:37:20 +0100,
> Fuad Tabba <tabba@google.com> wrote:
> >
> > Now that we have introduced finalize_init_hyp_mode(), lets
> > consolidate the initializing of the host_data fpsimd_state and
> > sve state.
> >
> > Signed-off-by: Fuad Tabba <tabba@google.com>
> > ---
> > arch/arm64/include/asm/kvm_host.h | 10 ++++++++--
> > arch/arm64/kvm/arm.c | 18 ++++++++++++------
> > arch/arm64/kvm/hyp/include/nvhe/pkvm.h | 1 -
> > arch/arm64/kvm/hyp/nvhe/pkvm.c | 11 -----------
> > arch/arm64/kvm/hyp/nvhe/setup.c | 1 -
> > 5 files changed, 20 insertions(+), 21 deletions(-)
> >
> > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> > index 7b3745ef1d73..8a170f314498 100644
> > --- a/arch/arm64/include/asm/kvm_host.h
> > +++ b/arch/arm64/include/asm/kvm_host.h
> > @@ -536,8 +536,14 @@ struct kvm_cpu_context {
> > struct kvm_host_data {
> > struct kvm_cpu_context host_ctxt;
> >
> > - struct user_fpsimd_state *fpsimd_state; /* hyp VA */
> > - struct user_sve_state *sve_state; /* hyp VA */
> > + /*
> > + * All pointers in this union are hyp VA.
> > + * sve_state is only used in pKVM and if system_supports_sve().
> > + */
> > + union {
> > + struct user_fpsimd_state *fpsimd_state;
> > + struct user_sve_state *sve_state;
> > + };
> >
> > /* Ownership of the FP regs */
> > enum {
> > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
> > index a9b1b0e9c319..a1c7e0ad6951 100644
> > --- a/arch/arm64/kvm/arm.c
> > +++ b/arch/arm64/kvm/arm.c
> > @@ -2445,14 +2445,20 @@ static void finalize_init_hyp_mode(void)
> > {
> > int cpu;
> >
> > - if (!is_protected_kvm_enabled() || !system_supports_sve())
> > - return;
> > -
> > for_each_possible_cpu(cpu) {
> > - struct user_sve_state *sve_state;
> > + if (system_supports_sve() && is_protected_kvm_enabled()) {
> > + struct user_sve_state *sve_state;
> >
> > - sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
> > - per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state = kern_hyp_va(sve_state);
> > + sve_state = per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state;
> > + per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->sve_state =
> > + kern_hyp_va(sve_state);
> > + } else {
> > + struct user_fpsimd_state *fpsimd_state;
> > +
> > + fpsimd_state = &per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->host_ctxt.fp_regs;
> > + per_cpu_ptr_nvhe_sym(kvm_host_data, cpu)->fpsimd_state =
> > + kern_hyp_va(fpsimd_state);
> > + }
>
> nit: SVE support and protected state do not change on a per CPU basis,
> so checking for these inside the loop is pretty counter intuitive.
I'll fix this.
Thanks for all the reviews!
/fuad
> Thanks,
>
> M.
>
> --
> Without deviation from the norm, progress is not possible.
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 21+ messages in thread