All of lore.kernel.org
 help / color / mirror / Atom feed
From: Marc Zyngier <maz@kernel.org>
To: Mark Brown <broonie@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>,
	Zhang Lei <zhang.lei@jp.fujitsu.com>,
	Andre Przywara <andre.przywara@arm.com>,
	Will Deacon <will@kernel.org>,
	kvmarm@lists.cs.columbia.edu,
	linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH v3 2/7] arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE
Date: Tue, 20 Sep 2022 18:14:13 +0100	[thread overview]
Message-ID: <87y1uej7dm.wl-maz@kernel.org> (raw)
In-Reply-To: <20220815225529.930315-3-broonie@kernel.org>

On Mon, 15 Aug 2022 23:55:24 +0100,
Mark Brown <broonie@kernel.org> wrote:
> 
> When we save the state for the floating point registers this can be done
> in the form visible through either the FPSIMD V registers or the SVE Z and
> P registers. At present we track which format is currently used based on
> TIF_SVE and the SME streaming mode state but particularly in the SVE case
> this limits our options for optimising things, especially around syscalls.
> Introduce a new enum in thread_struct which explicitly states which format
> is active and keep it up to date when we change it.
> 
> At present we do not use this state except to verify that it has the
> expected value when loading the state, future patches will introduce
> functional changes.
> 
> Signed-off-by: Mark Brown <broonie@kernel.org>
> ---
>  arch/arm64/include/asm/fpsimd.h    |  2 +-
>  arch/arm64/include/asm/kvm_host.h  |  1 +
>  arch/arm64/include/asm/processor.h |  6 ++++
>  arch/arm64/kernel/fpsimd.c         | 58 ++++++++++++++++++++++--------
>  arch/arm64/kernel/process.c        |  2 ++
>  arch/arm64/kernel/ptrace.c         |  3 ++
>  arch/arm64/kernel/signal.c         |  7 +++-
>  arch/arm64/kvm/fpsimd.c            |  3 +-
>  8 files changed, 64 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index c07e4abaca3d..b74103a79052 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -61,7 +61,7 @@ extern void fpsimd_kvm_prepare(void);
>  extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
>  				     void *sve_state, unsigned int sve_vl,
>  				     void *za_state, unsigned int sme_vl,
> -				     u64 *svcr);
> +				     u64 *svcr, enum fp_state *type);
>  
>  extern void fpsimd_flush_task_state(struct task_struct *target);
>  extern void fpsimd_save_and_flush_cpu_state(void);
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index f38ef299f13b..ebd37f97aeb4 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -310,6 +310,7 @@ struct kvm_vcpu_arch {
>  	void *sve_state;
>  	unsigned int sve_max_vl;
>  	u64 svcr;
> +	enum fp_state fp_type;

Is it a state or a type? Some consistency would help. Also, what does
this represent? Your commit message keeps talking about the FP/SVE
state for the host, but this is obviously a guest-related structure.
How do the two relate?

Finally, before this patch, pahole shows this:

struct kvm_vcpu_arch {
	struct kvm_cpu_context     ctxt;                 /*     0  1824 */
	/* --- cacheline 28 boundary (1792 bytes) was 32 bytes ago --- */
	void *                     sve_state;            /*  1824     8 */
	unsigned int               sve_max_vl;           /*  1832     4 */

	/* XXX 4 bytes hole, try to pack */

	u64                        svcr;                 /*  1840     8 */
	struct kvm_s2_mmu *        hw_mmu;               /*  1848     8 */
	[...]
};

After it, we gain an extra hole:

struct kvm_vcpu_arch {
	struct kvm_cpu_context     ctxt;                 /*     0  1824 */
	/* --- cacheline 28 boundary (1792 bytes) was 32 bytes ago --- */
	void *                     sve_state;            /*  1824     8 */
	unsigned int               sve_max_vl;           /*  1832     4 */

	/* XXX 4 bytes hole, try to pack */

	u64                        svcr;                 /*  1840     8 */
	enum fp_state              fp_type;              /*  1848     4 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 29 boundary (1856 bytes) --- */
	struct kvm_s2_mmu *        hw_mmu;               /*  1856     8 */
	[...]
};

Packing things wouldn't hurt.

>  
>  	/* Stage 2 paging state used by the hardware on next switch */
>  	struct kvm_s2_mmu *hw_mmu;
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 86eb0bfe3b38..4818a6b77f39 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -122,6 +122,11 @@ enum vec_type {
>  	ARM64_VEC_MAX,
>  };
>  
> +enum fp_state {
> +	FP_STATE_FPSIMD,
> +	FP_STATE_SVE,
> +};
> +
>  struct cpu_context {
>  	unsigned long x19;
>  	unsigned long x20;
> @@ -152,6 +157,7 @@ struct thread_struct {
>  		struct user_fpsimd_state fpsimd_state;
>  	} uw;
>  
> +	enum fp_state		fp_type;	/* registers FPSIMD or SVE? */

Same comment about the state vs type.

>  	unsigned int		fpsimd_cpu;
>  	void			*sve_state;	/* SVE registers, if any */
>  	void			*za_state;	/* ZA register, if any */
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 549e11645e0f..6544ae00230f 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -125,6 +125,7 @@ struct fpsimd_last_state_struct {
>  	u64 *svcr;
>  	unsigned int sve_vl;
>  	unsigned int sme_vl;
> +	enum fp_state *fp_type;

Same thing. Grouping the pointer together would probably help
readability as well.

>  };
>  
>  static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
> @@ -330,15 +331,6 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
>   *    The task can execute SVE instructions while in userspace without
>   *    trapping to the kernel.
>   *
> - *    When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
> - *    corresponding Zn), P0-P15 and FFR are encoded in
> - *    task->thread.sve_state, formatted appropriately for vector
> - *    length task->thread.sve_vl or, if SVCR.SM is set,
> - *    task->thread.sme_vl.
> - *
> - *    task->thread.sve_state must point to a valid buffer at least
> - *    sve_state_size(task) bytes in size.
> - *
>   *    During any syscall, the kernel may optionally clear TIF_SVE and
>   *    discard the vector state except for the FPSIMD subset.
>   *
> @@ -348,7 +340,15 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
>   *    do_sve_acc() to be called, which does some preparation and then
>   *    sets TIF_SVE.
>   *
> - *    When stored, FPSIMD registers V0-V31 are encoded in
> + * During any syscall, the kernel may optionally clear TIF_SVE and
> + * discard the vector state except for the FPSIMD subset.
> + *
> + * The data will be stored in one of two formats:
> + *
> + *  * FPSIMD only - FP_STATE_FPSIMD:
> + *
> + *    When the FPSIMD only state stored task->thread.fp_type is set to
> + *    FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
>   *    task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
>   *    logically zero but not stored anywhere; P0-P15 and FFR are not
>   *    stored and have unspecified values from userspace's point of
> @@ -358,6 +358,19 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
>   *    task->thread.sve_state does not need to be non-NULL, valid or any
>   *    particular size: it must not be dereferenced.
>   *
> + *  * SVE state - FP_STATE_SVE:
> + *
> + *    When the full SVE state is stored task->thread.fp_type is set to
> + *    FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
> + *    corresponding Zn), P0-P15 and FFR are encoded in in
> + *    task->thread.sve_state, formatted appropriately for vector
> + *    length task->thread.sve_vl or, if SVCR.SM is set,
> + *    task->thread.sme_vl. The storage for the vector registers in
> + *    task->thread.uw.fpsimd_state should be ignored.
> + *
> + *    task->thread.sve_state must point to a valid buffer at least
> + *    sve_state_size(task) bytes in size.
> + *
>   *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
>   *    irrespective of whether TIF_SVE is clear or set, since these are
>   *    not vector length dependent.
> @@ -404,12 +417,15 @@ static void task_fpsimd_load(void)
>  		}
>  	}
>  
> -	if (restore_sve_regs)
> +	if (restore_sve_regs) {
> +		WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
>  		sve_load_state(sve_pffr(&current->thread),
>  			       &current->thread.uw.fpsimd_state.fpsr,
>  			       restore_ffr);
> -	else
> +	} else {
> +		WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
>  		fpsimd_load_state(&current->thread.uw.fpsimd_state);
> +	}
>  }
>  
>  /*
> @@ -474,8 +490,10 @@ static void fpsimd_save(void)
>  		sve_save_state((char *)last->sve_state +
>  					sve_ffr_offset(vl),
>  			       &last->st->fpsr, save_ffr);
> +		*last->fp_type = FP_STATE_SVE;
>  	} else {
>  		fpsimd_save_state(last->st);
> +		*last->fp_type = FP_STATE_FPSIMD;
>  	}
>  }
>  
> @@ -848,8 +866,10 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
>  
>  	fpsimd_flush_task_state(task);
>  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
> -	    thread_sm_enabled(&task->thread))
> +	    thread_sm_enabled(&task->thread)) {
>  		sve_to_fpsimd(task);
> +		task->thread.fp_type = FP_STATE_FPSIMD;
> +	}
>  
>  	if (system_supports_sme() && type == ARM64_VEC_SME) {
>  		task->thread.svcr &= ~(SVCR_SM_MASK |
> @@ -1368,6 +1388,7 @@ static void sve_init_regs(void)
>  		fpsimd_bind_task_to_cpu();
>  	} else {
>  		fpsimd_to_sve(current);
> +		current->thread.fp_type = FP_STATE_SVE;
>  	}
>  }
>  
> @@ -1596,6 +1617,8 @@ void fpsimd_flush_thread(void)
>  		current->thread.svcr = 0;
>  	}
>  
> +	current->thread.fp_type = FP_STATE_FPSIMD;
> +
>  	put_cpu_fpsimd_context();
>  	kfree(sve_state);
>  	kfree(za_state);
> @@ -1644,8 +1667,10 @@ void fpsimd_kvm_prepare(void)
>  	 */
>  	get_cpu_fpsimd_context();
>  
> -	if (test_and_clear_thread_flag(TIF_SVE))
> +	if (test_and_clear_thread_flag(TIF_SVE)) {
>  		sve_to_fpsimd(current);
> +		current->thread.fp_type = FP_STATE_FPSIMD;
> +	}
>  
>  	put_cpu_fpsimd_context();
>  }
> @@ -1667,6 +1692,7 @@ static void fpsimd_bind_task_to_cpu(void)
>  	last->sve_vl = task_get_sve_vl(current);
>  	last->sme_vl = task_get_sme_vl(current);
>  	last->svcr = &current->thread.svcr;
> +	last->fp_type = &current->thread.fp_type;
>  	current->thread.fpsimd_cpu = smp_processor_id();
>  
>  	/*
> @@ -1690,7 +1716,8 @@ static void fpsimd_bind_task_to_cpu(void)
>  
>  void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
>  			      unsigned int sve_vl, void *za_state,
> -			      unsigned int sme_vl, u64 *svcr)
> +			      unsigned int sme_vl, u64 *svcr,
> +			      enum fp_state *type)
>  {
>  	struct fpsimd_last_state_struct *last =
>  		this_cpu_ptr(&fpsimd_last_state);
> @@ -1704,6 +1731,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
>  	last->za_state = za_state;
>  	last->sve_vl = sve_vl;
>  	last->sme_vl = sme_vl;
> +	last->fp_type = type;
>  }
>  
>  /*
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 92bcc1768f0b..944d782d581b 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -335,6 +335,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
>  		clear_tsk_thread_flag(dst, TIF_SME);
>  	}
>  
> +	dst->thread.fp_type = FP_STATE_FPSIMD;
> +
>  	/* clear any pending asynchronous tag fault raised by the parent */
>  	clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
>  
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index eb7c08dfb834..fb6189bc45c9 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -894,6 +894,7 @@ static int sve_set_common(struct task_struct *target,
>  		clear_tsk_thread_flag(target, TIF_SVE);
>  		if (type == ARM64_VEC_SME)
>  			fpsimd_force_sync_to_sve(target);
> +		target->thread.fp_type = FP_STATE_FPSIMD;
>  		goto out;
>  	}
>  
> @@ -916,6 +917,7 @@ static int sve_set_common(struct task_struct *target,
>  	if (!target->thread.sve_state) {
>  		ret = -ENOMEM;
>  		clear_tsk_thread_flag(target, TIF_SVE);
> +		target->thread.fp_type = FP_STATE_FPSIMD;
>  		goto out;
>  	}
>  
> @@ -927,6 +929,7 @@ static int sve_set_common(struct task_struct *target,
>  	 */
>  	fpsimd_sync_to_sve(target);
>  	set_tsk_thread_flag(target, TIF_SVE);
> +	target->thread.fp_type = FP_STATE_SVE;
>  
>  	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
>  	start = SVE_PT_SVE_OFFSET;
> diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> index f00e8b33170a..804cc00befc3 100644
> --- a/arch/arm64/kernel/signal.c
> +++ b/arch/arm64/kernel/signal.c
> @@ -207,6 +207,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
>  	__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
>  
>  	clear_thread_flag(TIF_SVE);
> +	current->thread.fp_type = FP_STATE_FPSIMD;
>  
>  	/* load the hardware registers from the fpsimd_state structure */
>  	if (!err)
> @@ -292,6 +293,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
>  	if (sve.head.size <= sizeof(*user->sve)) {
>  		clear_thread_flag(TIF_SVE);
>  		current->thread.svcr &= ~SVCR_SM_MASK;
> +		current->thread.fp_type = FP_STATE_FPSIMD;
>  		goto fpsimd_only;
>  	}
>  
> @@ -327,6 +329,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
>  		current->thread.svcr |= SVCR_SM_MASK;
>  	else
>  		set_thread_flag(TIF_SVE);
> +	current->thread.fp_type = FP_STATE_SVE;
>  
>  fpsimd_only:
>  	/* copy the FP and status/control registers */
> @@ -932,9 +935,11 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
>  		 * FPSIMD register state - flush the saved FPSIMD
>  		 * register state in case it gets loaded.
>  		 */
> -		if (current->thread.svcr & SVCR_SM_MASK)
> +		if (current->thread.svcr & SVCR_SM_MASK) {
>  			memset(&current->thread.uw.fpsimd_state, 0,
>  			       sizeof(current->thread.uw.fpsimd_state));
> +			current->thread.fp_type = FP_STATE_FPSIMD;
> +		}
>  
>  		current->thread.svcr &= ~(SVCR_ZA_MASK |
>  					  SVCR_SM_MASK);
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 1c1b309ef420..a92977759f8d 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -140,7 +140,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
>  		fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
>  					 vcpu->arch.sve_state,
>  					 vcpu->arch.sve_max_vl,
> -					 NULL, 0, &vcpu->arch.svcr);
> +					 NULL, 0, &vcpu->arch.svcr,
> +					 &vcpu->arch.fp_type);
>  
>  		clear_thread_flag(TIF_FOREIGN_FPSTATE);
>  		update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.
_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

WARNING: multiple messages have this Message-ID (diff)
From: Marc Zyngier <maz@kernel.org>
To: Mark Brown <broonie@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>,
	Will Deacon <will@kernel.org>,
	Zhang Lei <zhang.lei@jp.fujitsu.com>,
	James Morse <james.morse@arm.com>,
	Alexandru Elisei <alexandru.elisei@arm.com>,
	Andre Przywara <andre.przywara@arm.com>,
	kvmarm@lists.cs.columbia.edu,
	linux-arm-kernel@lists.infradead.org
Subject: Re: [PATCH v3 2/7] arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE
Date: Tue, 20 Sep 2022 18:14:13 +0100	[thread overview]
Message-ID: <87y1uej7dm.wl-maz@kernel.org> (raw)
In-Reply-To: <20220815225529.930315-3-broonie@kernel.org>

On Mon, 15 Aug 2022 23:55:24 +0100,
Mark Brown <broonie@kernel.org> wrote:
> 
> When we save the state for the floating point registers this can be done
> in the form visible through either the FPSIMD V registers or the SVE Z and
> P registers. At present we track which format is currently used based on
> TIF_SVE and the SME streaming mode state but particularly in the SVE case
> this limits our options for optimising things, especially around syscalls.
> Introduce a new enum in thread_struct which explicitly states which format
> is active and keep it up to date when we change it.
> 
> At present we do not use this state except to verify that it has the
> expected value when loading the state, future patches will introduce
> functional changes.
> 
> Signed-off-by: Mark Brown <broonie@kernel.org>
> ---
>  arch/arm64/include/asm/fpsimd.h    |  2 +-
>  arch/arm64/include/asm/kvm_host.h  |  1 +
>  arch/arm64/include/asm/processor.h |  6 ++++
>  arch/arm64/kernel/fpsimd.c         | 58 ++++++++++++++++++++++--------
>  arch/arm64/kernel/process.c        |  2 ++
>  arch/arm64/kernel/ptrace.c         |  3 ++
>  arch/arm64/kernel/signal.c         |  7 +++-
>  arch/arm64/kvm/fpsimd.c            |  3 +-
>  8 files changed, 64 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h
> index c07e4abaca3d..b74103a79052 100644
> --- a/arch/arm64/include/asm/fpsimd.h
> +++ b/arch/arm64/include/asm/fpsimd.h
> @@ -61,7 +61,7 @@ extern void fpsimd_kvm_prepare(void);
>  extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
>  				     void *sve_state, unsigned int sve_vl,
>  				     void *za_state, unsigned int sme_vl,
> -				     u64 *svcr);
> +				     u64 *svcr, enum fp_state *type);
>  
>  extern void fpsimd_flush_task_state(struct task_struct *target);
>  extern void fpsimd_save_and_flush_cpu_state(void);
> diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
> index f38ef299f13b..ebd37f97aeb4 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -310,6 +310,7 @@ struct kvm_vcpu_arch {
>  	void *sve_state;
>  	unsigned int sve_max_vl;
>  	u64 svcr;
> +	enum fp_state fp_type;

Is it a state or a type? Some consistency would help. Also, what does
this represent? Your commit message keeps talking about the FP/SVE
state for the host, but this is obviously a guest-related structure.
How do the two relate?

Finally, before this patch, pahole shows this:

struct kvm_vcpu_arch {
	struct kvm_cpu_context     ctxt;                 /*     0  1824 */
	/* --- cacheline 28 boundary (1792 bytes) was 32 bytes ago --- */
	void *                     sve_state;            /*  1824     8 */
	unsigned int               sve_max_vl;           /*  1832     4 */

	/* XXX 4 bytes hole, try to pack */

	u64                        svcr;                 /*  1840     8 */
	struct kvm_s2_mmu *        hw_mmu;               /*  1848     8 */
	[...]
};

After it, we gain an extra hole:

struct kvm_vcpu_arch {
	struct kvm_cpu_context     ctxt;                 /*     0  1824 */
	/* --- cacheline 28 boundary (1792 bytes) was 32 bytes ago --- */
	void *                     sve_state;            /*  1824     8 */
	unsigned int               sve_max_vl;           /*  1832     4 */

	/* XXX 4 bytes hole, try to pack */

	u64                        svcr;                 /*  1840     8 */
	enum fp_state              fp_type;              /*  1848     4 */

	/* XXX 4 bytes hole, try to pack */

	/* --- cacheline 29 boundary (1856 bytes) --- */
	struct kvm_s2_mmu *        hw_mmu;               /*  1856     8 */
	[...]
};

Packing things wouldn't hurt.

>  
>  	/* Stage 2 paging state used by the hardware on next switch */
>  	struct kvm_s2_mmu *hw_mmu;
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 86eb0bfe3b38..4818a6b77f39 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -122,6 +122,11 @@ enum vec_type {
>  	ARM64_VEC_MAX,
>  };
>  
> +enum fp_state {
> +	FP_STATE_FPSIMD,
> +	FP_STATE_SVE,
> +};
> +
>  struct cpu_context {
>  	unsigned long x19;
>  	unsigned long x20;
> @@ -152,6 +157,7 @@ struct thread_struct {
>  		struct user_fpsimd_state fpsimd_state;
>  	} uw;
>  
> +	enum fp_state		fp_type;	/* registers FPSIMD or SVE? */

Same comment about the state vs type.

>  	unsigned int		fpsimd_cpu;
>  	void			*sve_state;	/* SVE registers, if any */
>  	void			*za_state;	/* ZA register, if any */
> diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
> index 549e11645e0f..6544ae00230f 100644
> --- a/arch/arm64/kernel/fpsimd.c
> +++ b/arch/arm64/kernel/fpsimd.c
> @@ -125,6 +125,7 @@ struct fpsimd_last_state_struct {
>  	u64 *svcr;
>  	unsigned int sve_vl;
>  	unsigned int sme_vl;
> +	enum fp_state *fp_type;

Same thing. Grouping the pointer together would probably help
readability as well.

>  };
>  
>  static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
> @@ -330,15 +331,6 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
>   *    The task can execute SVE instructions while in userspace without
>   *    trapping to the kernel.
>   *
> - *    When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
> - *    corresponding Zn), P0-P15 and FFR are encoded in
> - *    task->thread.sve_state, formatted appropriately for vector
> - *    length task->thread.sve_vl or, if SVCR.SM is set,
> - *    task->thread.sme_vl.
> - *
> - *    task->thread.sve_state must point to a valid buffer at least
> - *    sve_state_size(task) bytes in size.
> - *
>   *    During any syscall, the kernel may optionally clear TIF_SVE and
>   *    discard the vector state except for the FPSIMD subset.
>   *
> @@ -348,7 +340,15 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
>   *    do_sve_acc() to be called, which does some preparation and then
>   *    sets TIF_SVE.
>   *
> - *    When stored, FPSIMD registers V0-V31 are encoded in
> + * During any syscall, the kernel may optionally clear TIF_SVE and
> + * discard the vector state except for the FPSIMD subset.
> + *
> + * The data will be stored in one of two formats:
> + *
> + *  * FPSIMD only - FP_STATE_FPSIMD:
> + *
> + *    When the FPSIMD only state stored task->thread.fp_type is set to
> + *    FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
>   *    task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
>   *    logically zero but not stored anywhere; P0-P15 and FFR are not
>   *    stored and have unspecified values from userspace's point of
> @@ -358,6 +358,19 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
>   *    task->thread.sve_state does not need to be non-NULL, valid or any
>   *    particular size: it must not be dereferenced.
>   *
> + *  * SVE state - FP_STATE_SVE:
> + *
> + *    When the full SVE state is stored task->thread.fp_type is set to
> + *    FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
> + *    corresponding Zn), P0-P15 and FFR are encoded in in
> + *    task->thread.sve_state, formatted appropriately for vector
> + *    length task->thread.sve_vl or, if SVCR.SM is set,
> + *    task->thread.sme_vl. The storage for the vector registers in
> + *    task->thread.uw.fpsimd_state should be ignored.
> + *
> + *    task->thread.sve_state must point to a valid buffer at least
> + *    sve_state_size(task) bytes in size.
> + *
>   *  * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
>   *    irrespective of whether TIF_SVE is clear or set, since these are
>   *    not vector length dependent.
> @@ -404,12 +417,15 @@ static void task_fpsimd_load(void)
>  		}
>  	}
>  
> -	if (restore_sve_regs)
> +	if (restore_sve_regs) {
> +		WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
>  		sve_load_state(sve_pffr(&current->thread),
>  			       &current->thread.uw.fpsimd_state.fpsr,
>  			       restore_ffr);
> -	else
> +	} else {
> +		WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
>  		fpsimd_load_state(&current->thread.uw.fpsimd_state);
> +	}
>  }
>  
>  /*
> @@ -474,8 +490,10 @@ static void fpsimd_save(void)
>  		sve_save_state((char *)last->sve_state +
>  					sve_ffr_offset(vl),
>  			       &last->st->fpsr, save_ffr);
> +		*last->fp_type = FP_STATE_SVE;
>  	} else {
>  		fpsimd_save_state(last->st);
> +		*last->fp_type = FP_STATE_FPSIMD;
>  	}
>  }
>  
> @@ -848,8 +866,10 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
>  
>  	fpsimd_flush_task_state(task);
>  	if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
> -	    thread_sm_enabled(&task->thread))
> +	    thread_sm_enabled(&task->thread)) {
>  		sve_to_fpsimd(task);
> +		task->thread.fp_type = FP_STATE_FPSIMD;
> +	}
>  
>  	if (system_supports_sme() && type == ARM64_VEC_SME) {
>  		task->thread.svcr &= ~(SVCR_SM_MASK |
> @@ -1368,6 +1388,7 @@ static void sve_init_regs(void)
>  		fpsimd_bind_task_to_cpu();
>  	} else {
>  		fpsimd_to_sve(current);
> +		current->thread.fp_type = FP_STATE_SVE;
>  	}
>  }
>  
> @@ -1596,6 +1617,8 @@ void fpsimd_flush_thread(void)
>  		current->thread.svcr = 0;
>  	}
>  
> +	current->thread.fp_type = FP_STATE_FPSIMD;
> +
>  	put_cpu_fpsimd_context();
>  	kfree(sve_state);
>  	kfree(za_state);
> @@ -1644,8 +1667,10 @@ void fpsimd_kvm_prepare(void)
>  	 */
>  	get_cpu_fpsimd_context();
>  
> -	if (test_and_clear_thread_flag(TIF_SVE))
> +	if (test_and_clear_thread_flag(TIF_SVE)) {
>  		sve_to_fpsimd(current);
> +		current->thread.fp_type = FP_STATE_FPSIMD;
> +	}
>  
>  	put_cpu_fpsimd_context();
>  }
> @@ -1667,6 +1692,7 @@ static void fpsimd_bind_task_to_cpu(void)
>  	last->sve_vl = task_get_sve_vl(current);
>  	last->sme_vl = task_get_sme_vl(current);
>  	last->svcr = &current->thread.svcr;
> +	last->fp_type = &current->thread.fp_type;
>  	current->thread.fpsimd_cpu = smp_processor_id();
>  
>  	/*
> @@ -1690,7 +1716,8 @@ static void fpsimd_bind_task_to_cpu(void)
>  
>  void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
>  			      unsigned int sve_vl, void *za_state,
> -			      unsigned int sme_vl, u64 *svcr)
> +			      unsigned int sme_vl, u64 *svcr,
> +			      enum fp_state *type)
>  {
>  	struct fpsimd_last_state_struct *last =
>  		this_cpu_ptr(&fpsimd_last_state);
> @@ -1704,6 +1731,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
>  	last->za_state = za_state;
>  	last->sve_vl = sve_vl;
>  	last->sme_vl = sme_vl;
> +	last->fp_type = type;
>  }
>  
>  /*
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 92bcc1768f0b..944d782d581b 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -335,6 +335,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
>  		clear_tsk_thread_flag(dst, TIF_SME);
>  	}
>  
> +	dst->thread.fp_type = FP_STATE_FPSIMD;
> +
>  	/* clear any pending asynchronous tag fault raised by the parent */
>  	clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
>  
> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c
> index eb7c08dfb834..fb6189bc45c9 100644
> --- a/arch/arm64/kernel/ptrace.c
> +++ b/arch/arm64/kernel/ptrace.c
> @@ -894,6 +894,7 @@ static int sve_set_common(struct task_struct *target,
>  		clear_tsk_thread_flag(target, TIF_SVE);
>  		if (type == ARM64_VEC_SME)
>  			fpsimd_force_sync_to_sve(target);
> +		target->thread.fp_type = FP_STATE_FPSIMD;
>  		goto out;
>  	}
>  
> @@ -916,6 +917,7 @@ static int sve_set_common(struct task_struct *target,
>  	if (!target->thread.sve_state) {
>  		ret = -ENOMEM;
>  		clear_tsk_thread_flag(target, TIF_SVE);
> +		target->thread.fp_type = FP_STATE_FPSIMD;
>  		goto out;
>  	}
>  
> @@ -927,6 +929,7 @@ static int sve_set_common(struct task_struct *target,
>  	 */
>  	fpsimd_sync_to_sve(target);
>  	set_tsk_thread_flag(target, TIF_SVE);
> +	target->thread.fp_type = FP_STATE_SVE;
>  
>  	BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header));
>  	start = SVE_PT_SVE_OFFSET;
> diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c
> index f00e8b33170a..804cc00befc3 100644
> --- a/arch/arm64/kernel/signal.c
> +++ b/arch/arm64/kernel/signal.c
> @@ -207,6 +207,7 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx)
>  	__get_user_error(fpsimd.fpcr, &ctx->fpcr, err);
>  
>  	clear_thread_flag(TIF_SVE);
> +	current->thread.fp_type = FP_STATE_FPSIMD;
>  
>  	/* load the hardware registers from the fpsimd_state structure */
>  	if (!err)
> @@ -292,6 +293,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
>  	if (sve.head.size <= sizeof(*user->sve)) {
>  		clear_thread_flag(TIF_SVE);
>  		current->thread.svcr &= ~SVCR_SM_MASK;
> +		current->thread.fp_type = FP_STATE_FPSIMD;
>  		goto fpsimd_only;
>  	}
>  
> @@ -327,6 +329,7 @@ static int restore_sve_fpsimd_context(struct user_ctxs *user)
>  		current->thread.svcr |= SVCR_SM_MASK;
>  	else
>  		set_thread_flag(TIF_SVE);
> +	current->thread.fp_type = FP_STATE_SVE;
>  
>  fpsimd_only:
>  	/* copy the FP and status/control registers */
> @@ -932,9 +935,11 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka,
>  		 * FPSIMD register state - flush the saved FPSIMD
>  		 * register state in case it gets loaded.
>  		 */
> -		if (current->thread.svcr & SVCR_SM_MASK)
> +		if (current->thread.svcr & SVCR_SM_MASK) {
>  			memset(&current->thread.uw.fpsimd_state, 0,
>  			       sizeof(current->thread.uw.fpsimd_state));
> +			current->thread.fp_type = FP_STATE_FPSIMD;
> +		}
>  
>  		current->thread.svcr &= ~(SVCR_ZA_MASK |
>  					  SVCR_SM_MASK);
> diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c
> index 1c1b309ef420..a92977759f8d 100644
> --- a/arch/arm64/kvm/fpsimd.c
> +++ b/arch/arm64/kvm/fpsimd.c
> @@ -140,7 +140,8 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
>  		fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
>  					 vcpu->arch.sve_state,
>  					 vcpu->arch.sve_max_vl,
> -					 NULL, 0, &vcpu->arch.svcr);
> +					 NULL, 0, &vcpu->arch.svcr,
> +					 &vcpu->arch.fp_type);
>  
>  		clear_thread_flag(TIF_FOREIGN_FPSTATE);
>  		update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

  reply	other threads:[~2022-09-20 17:14 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-08-15 22:55 [PATCH v3 0/7] arm64/sve: Clean up KVM integration and optimise syscalls Mark Brown
2022-08-15 22:55 ` Mark Brown
2022-08-15 22:55 ` [PATCH v3 1/7] KVM: arm64: Discard any SVE state when entering KVM guests Mark Brown
2022-08-15 22:55   ` Mark Brown
2022-09-20 16:44   ` Marc Zyngier
2022-09-20 16:44     ` Marc Zyngier
2022-09-20 20:21     ` Mark Brown
2022-09-20 20:21       ` Mark Brown
2022-09-21 17:31       ` Marc Zyngier
2022-09-21 17:31         ` Marc Zyngier
2022-09-22 11:44         ` Mark Brown
2022-09-22 11:44           ` Mark Brown
2022-08-15 22:55 ` [PATCH v3 2/7] arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE Mark Brown
2022-08-15 22:55   ` Mark Brown
2022-09-20 17:14   ` Marc Zyngier [this message]
2022-09-20 17:14     ` Marc Zyngier
2022-09-20 18:09     ` Mark Brown
2022-09-20 18:09       ` Mark Brown
2022-09-20 18:30       ` Marc Zyngier
2022-09-20 18:30         ` Marc Zyngier
2022-08-15 22:55 ` [PATCH v3 3/7] arm64/fpsimd: Have KVM explicitly say which FP registers to save Mark Brown
2022-08-15 22:55   ` Mark Brown
2022-09-20 17:52   ` Marc Zyngier
2022-09-20 17:52     ` Marc Zyngier
2022-09-20 18:32     ` Mark Brown
2022-09-20 18:32       ` Mark Brown
2022-09-21 17:47       ` Marc Zyngier
2022-09-21 17:47         ` Marc Zyngier
2022-09-22 12:18         ` Mark Brown
2022-09-22 12:18           ` Mark Brown
2022-08-15 22:55 ` [PATCH v3 4/7] arm64/fpsimd: Stop using TIF_SVE to manage register saving in KVM Mark Brown
2022-08-15 22:55   ` Mark Brown
2022-09-20 18:04   ` Marc Zyngier
2022-09-20 18:04     ` Marc Zyngier
2022-09-20 18:53     ` Mark Brown
2022-09-20 18:53       ` Mark Brown
2022-08-15 22:55 ` [PATCH v3 5/7] arm64/fpsimd: Load FP state based on recorded data type Mark Brown
2022-08-15 22:55   ` Mark Brown
2022-09-20 18:19   ` Marc Zyngier
2022-09-20 18:19     ` Marc Zyngier
2022-09-20 19:02     ` Mark Brown
2022-09-20 19:02       ` Mark Brown
2022-08-15 22:55 ` [PATCH v3 6/7] arm64/fpsimd: SME no longer requires SVE register state Mark Brown
2022-08-15 22:55   ` Mark Brown
2022-08-15 22:55 ` [PATCH v3 7/7] arm64/sve: Leave SVE enabled on syscall if we don't context switch Mark Brown
2022-08-15 22:55   ` Mark Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87y1uej7dm.wl-maz@kernel.org \
    --to=maz@kernel.org \
    --cc=andre.przywara@arm.com \
    --cc=broonie@kernel.org \
    --cc=catalin.marinas@arm.com \
    --cc=kvmarm@lists.cs.columbia.edu \
    --cc=linux-arm-kernel@lists.infradead.org \
    --cc=will@kernel.org \
    --cc=zhang.lei@jp.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.