* [PATCH v2 01/34] target/arm: Rename FPST_FPCR_A32 to FPST_A32
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:41 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 02/34] target/arm: Rename FPST_FPCR_A64 to FPST_A64 Richard Henderson
` (32 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate.h | 6 ++--
target/arm/tcg/translate-vfp.c | 54 +++++++++++++++++-----------------
2 files changed, 30 insertions(+), 30 deletions(-)
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 59e780df2e..6ce2471aa6 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -674,7 +674,7 @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
* Enum for argument to fpstatus_ptr().
*/
typedef enum ARMFPStatusFlavour {
- FPST_FPCR_A32,
+ FPST_A32,
FPST_FPCR_A64,
FPST_FPCR_F16_A32,
FPST_FPCR_F16_A64,
@@ -692,7 +692,7 @@ typedef enum ARMFPStatusFlavour {
* been set up to point to the requested field in the CPU state struct.
* The options are:
*
- * FPST_FPCR_A32
+ * FPST_A32
* for AArch32 non-FP16 operations controlled by the FPCR
* FPST_FPCR_A64
* for AArch64 non-FP16 operations controlled by the FPCR
@@ -717,7 +717,7 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
int offset;
switch (flavour) {
- case FPST_FPCR_A32:
+ case FPST_A32:
offset = offsetof(CPUARMState, vfp.fp_status_a32);
break;
case FPST_FPCR_A64:
diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c
index 8eebba0f27..4cc12a407b 100644
--- a/target/arm/tcg/translate-vfp.c
+++ b/target/arm/tcg/translate-vfp.c
@@ -462,7 +462,7 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
if (sz == 1) {
fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
} else {
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
}
tcg_rmode = gen_set_rmode(rounding, fpst);
@@ -529,7 +529,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
if (sz == 1) {
fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
} else {
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
}
tcg_shift = tcg_constant_i32(0);
@@ -1398,7 +1398,7 @@ static bool do_vfp_3op_sp(DisasContext *s, VFPGen3OpSPFn *fn,
f0 = tcg_temp_new_i32();
f1 = tcg_temp_new_i32();
fd = tcg_temp_new_i32();
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
vfp_load_reg32(f0, vn);
vfp_load_reg32(f1, vm);
@@ -1517,7 +1517,7 @@ static bool do_vfp_3op_dp(DisasContext *s, VFPGen3OpDPFn *fn,
f0 = tcg_temp_new_i64();
f1 = tcg_temp_new_i64();
fd = tcg_temp_new_i64();
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
vfp_load_reg64(f0, vn);
vfp_load_reg64(f1, vm);
@@ -2181,7 +2181,7 @@ static bool do_vfm_sp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
/* VFNMA, VFNMS */
gen_vfp_negs(vd, vd);
}
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_vfp_muladds(vd, vn, vm, vd, fpst);
vfp_store_reg32(vd, a->vd);
return true;
@@ -2246,7 +2246,7 @@ static bool do_vfm_dp(DisasContext *s, arg_VFMA_dp *a, bool neg_n, bool neg_d)
/* VFNMA, VFNMS */
gen_vfp_negd(vd, vd);
}
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_vfp_muladdd(vd, vn, vm, vd, fpst);
vfp_store_reg64(vd, a->vd);
return true;
@@ -2429,12 +2429,12 @@ static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
{
- gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_FPCR_A32));
+ gen_helper_vfp_sqrts(vd, vm, fpstatus_ptr(FPST_A32));
}
static void gen_VSQRT_dp(TCGv_i64 vd, TCGv_i64 vm)
{
- gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_FPCR_A32));
+ gen_helper_vfp_sqrtd(vd, vm, fpstatus_ptr(FPST_A32));
}
DO_VFP_2OP(VSQRT, hp, gen_VSQRT_hp, aa32_fp16_arith)
@@ -2565,7 +2565,7 @@ static bool trans_VCVT_f32_f16(DisasContext *s, arg_VCVT_f32_f16 *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();
/* The T bit tells us if we want the low or high 16 bits of Vm */
@@ -2599,7 +2599,7 @@ static bool trans_VCVT_f64_f16(DisasContext *s, arg_VCVT_f64_f16 *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();
/* The T bit tells us if we want the low or high 16 bits of Vm */
@@ -2623,7 +2623,7 @@ static bool trans_VCVT_b16_f32(DisasContext *s, arg_VCVT_b16_f32 *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
tmp = tcg_temp_new_i32();
vfp_load_reg32(tmp, a->vm);
@@ -2646,7 +2646,7 @@ static bool trans_VCVT_f16_f32(DisasContext *s, arg_VCVT_f16_f32 *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();
@@ -2680,7 +2680,7 @@ static bool trans_VCVT_f16_f64(DisasContext *s, arg_VCVT_f16_f64 *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
ahp_mode = get_ahp_flag();
tmp = tcg_temp_new_i32();
vm = tcg_temp_new_i64();
@@ -2727,7 +2727,7 @@ static bool trans_VRINTR_sp(DisasContext *s, arg_VRINTR_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg32(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_rints(tmp, tmp, fpst);
vfp_store_reg32(tmp, a->vd);
return true;
@@ -2757,7 +2757,7 @@ static bool trans_VRINTR_dp(DisasContext *s, arg_VRINTR_dp *a)
tmp = tcg_temp_new_i64();
vfp_load_reg64(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_rintd(tmp, tmp, fpst);
vfp_store_reg64(tmp, a->vd);
return true;
@@ -2803,7 +2803,7 @@ static bool trans_VRINTZ_sp(DisasContext *s, arg_VRINTZ_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg32(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
gen_helper_rints(tmp, tmp, fpst);
gen_restore_rmode(tcg_rmode, fpst);
@@ -2836,7 +2836,7 @@ static bool trans_VRINTZ_dp(DisasContext *s, arg_VRINTZ_dp *a)
tmp = tcg_temp_new_i64();
vfp_load_reg64(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
gen_helper_rintd(tmp, tmp, fpst);
gen_restore_rmode(tcg_rmode, fpst);
@@ -2880,7 +2880,7 @@ static bool trans_VRINTX_sp(DisasContext *s, arg_VRINTX_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg32(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_rints_exact(tmp, tmp, fpst);
vfp_store_reg32(tmp, a->vd);
return true;
@@ -2910,7 +2910,7 @@ static bool trans_VRINTX_dp(DisasContext *s, arg_VRINTX_dp *a)
tmp = tcg_temp_new_i64();
vfp_load_reg64(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
gen_helper_rintd_exact(tmp, tmp, fpst);
vfp_store_reg64(tmp, a->vd);
return true;
@@ -2937,7 +2937,7 @@ static bool trans_VCVT_sp(DisasContext *s, arg_VCVT_sp *a)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i64();
vfp_load_reg32(vm, a->vm);
- gen_helper_vfp_fcvtds(vd, vm, fpstatus_ptr(FPST_FPCR_A32));
+ gen_helper_vfp_fcvtds(vd, vm, fpstatus_ptr(FPST_A32));
vfp_store_reg64(vd, a->vd);
return true;
}
@@ -2963,7 +2963,7 @@ static bool trans_VCVT_dp(DisasContext *s, arg_VCVT_dp *a)
vd = tcg_temp_new_i32();
vm = tcg_temp_new_i64();
vfp_load_reg64(vm, a->vm);
- gen_helper_vfp_fcvtsd(vd, vm, fpstatus_ptr(FPST_FPCR_A32));
+ gen_helper_vfp_fcvtsd(vd, vm, fpstatus_ptr(FPST_A32));
vfp_store_reg32(vd, a->vd);
return true;
}
@@ -3010,7 +3010,7 @@ static bool trans_VCVT_int_sp(DisasContext *s, arg_VCVT_int_sp *a)
vm = tcg_temp_new_i32();
vfp_load_reg32(vm, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
if (a->s) {
/* i32 -> f32 */
gen_helper_vfp_sitos(vm, vm, fpst);
@@ -3044,7 +3044,7 @@ static bool trans_VCVT_int_dp(DisasContext *s, arg_VCVT_int_dp *a)
vm = tcg_temp_new_i32();
vd = tcg_temp_new_i64();
vfp_load_reg32(vm, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
if (a->s) {
/* i32 -> f64 */
gen_helper_vfp_sitod(vd, vm, fpst);
@@ -3161,7 +3161,7 @@ static bool trans_VCVT_fix_sp(DisasContext *s, arg_VCVT_fix_sp *a)
vd = tcg_temp_new_i32();
vfp_load_reg32(vd, a->vd);
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
shift = tcg_constant_i32(frac_bits);
/* Switch on op:U:sx bits */
@@ -3223,7 +3223,7 @@ static bool trans_VCVT_fix_dp(DisasContext *s, arg_VCVT_fix_dp *a)
vd = tcg_temp_new_i64();
vfp_load_reg64(vd, a->vd);
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
shift = tcg_constant_i32(frac_bits);
/* Switch on op:U:sx bits */
@@ -3307,7 +3307,7 @@ static bool trans_VCVT_sp_int(DisasContext *s, arg_VCVT_sp_int *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
vm = tcg_temp_new_i32();
vfp_load_reg32(vm, a->vm);
@@ -3347,7 +3347,7 @@ static bool trans_VCVT_dp_int(DisasContext *s, arg_VCVT_dp_int *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR_A32);
+ fpst = fpstatus_ptr(FPST_A32);
vm = tcg_temp_new_i64();
vd = tcg_temp_new_i32();
vfp_load_reg64(vm, a->vm);
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 02/34] target/arm: Rename FPST_FPCR_A64 to FPST_A64
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
2025-01-29 1:38 ` [PATCH v2 01/34] target/arm: Rename FPST_FPCR_A32 to FPST_A32 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:42 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 03/34] target/arm: Rename FPST_FPCR_F16_A32 to FPST_A32_F16 Richard Henderson
` (31 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate.h | 8 +--
target/arm/tcg/translate-a64.c | 78 +++++++++++++--------------
target/arm/tcg/translate-sme.c | 4 +-
target/arm/tcg/translate-sve.c | 98 +++++++++++++++++-----------------
4 files changed, 94 insertions(+), 94 deletions(-)
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 6ce2471aa6..2edb707b85 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -675,7 +675,7 @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
*/
typedef enum ARMFPStatusFlavour {
FPST_A32,
- FPST_FPCR_A64,
+ FPST_A64,
FPST_FPCR_F16_A32,
FPST_FPCR_F16_A64,
FPST_FPCR_AH,
@@ -694,7 +694,7 @@ typedef enum ARMFPStatusFlavour {
*
* FPST_A32
* for AArch32 non-FP16 operations controlled by the FPCR
- * FPST_FPCR_A64
+ * FPST_A64
* for AArch64 non-FP16 operations controlled by the FPCR
* FPST_FPCR_F16_A32
* for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
@@ -720,7 +720,7 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
case FPST_A32:
offset = offsetof(CPUARMState, vfp.fp_status_a32);
break;
- case FPST_FPCR_A64:
+ case FPST_A64:
offset = offsetof(CPUARMState, vfp.fp_status_a64);
break;
case FPST_FPCR_F16_A32:
@@ -757,7 +757,7 @@ static inline ARMFPStatusFlavour select_fpst(DisasContext *s, MemOp esz)
if (s->fpcr_ah) {
return esz == MO_16 ? FPST_FPCR_AH_F16 : FPST_FPCR_AH;
} else {
- return esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64;
+ return esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64;
}
}
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 3e2fe46464..bf17ecca80 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5247,7 +5247,7 @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
{
return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
a->esz == MO_16 ?
- FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ FPST_FPCR_F16_A64 : FPST_A64);
}
static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
@@ -5506,9 +5506,9 @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
TCGv_i64 t0 = read_fp_dreg(s, a->rn);
TCGv_i64 t1 = tcg_constant_i64(0);
if (swap) {
- f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_FPCR_A64));
+ f->gen_d(t0, t1, t0, fpstatus_ptr(FPST_A64));
} else {
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64));
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
}
write_fp_dreg(s, a->rd, t0);
}
@@ -5518,9 +5518,9 @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
TCGv_i32 t0 = read_fp_sreg(s, a->rn);
TCGv_i32 t1 = tcg_constant_i32(0);
if (swap) {
- f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_FPCR_A64));
+ f->gen_s(t0, t1, t0, fpstatus_ptr(FPST_A64));
} else {
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64));
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
}
write_fp_sreg(s, a->rd, t0);
}
@@ -5768,7 +5768,7 @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
{
return do_fp3_vector_with_fpsttype(s, a, data, fns,
a->esz == MO_16 ?
- FPST_FPCR_F16_A64 :FPST_FPCR_A64);
+ FPST_FPCR_F16_A64 :FPST_A64);
}
static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
@@ -6135,7 +6135,7 @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
if (fp_access_check(s)) {
/* Q bit selects BFMLALB vs BFMLALT. */
gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64, a->q,
+ s->fpcr_ah ? FPST_FPCR_AH : FPST_A64, a->q,
gen_helper_gvec_bfmlal);
}
return true;
@@ -6174,7 +6174,7 @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
}
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64,
+ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64,
a->rot, fn[a->esz]);
return true;
}
@@ -6543,7 +6543,7 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
TCGv_i64 t1 = tcg_temp_new_i64();
read_vec_element(s, t1, a->rm, a->idx, MO_64);
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64));
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
write_fp_dreg_merging(s, a->rd, a->rn, t0);
}
break;
@@ -6553,7 +6553,7 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
TCGv_i32 t1 = tcg_temp_new_i32();
read_vec_element_i32(s, t1, a->rm, a->idx, MO_32);
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64));
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
write_fp_sreg_merging(s, a->rd, a->rn, t0);
}
break;
@@ -6592,7 +6592,7 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
if (neg) {
gen_vfp_maybe_ah_negd(s, t1, t1);
}
- gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR_A64));
+ gen_helper_vfp_muladdd(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
write_fp_dreg_merging(s, a->rd, a->rd, t0);
}
break;
@@ -6606,7 +6606,7 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
if (neg) {
gen_vfp_maybe_ah_negs(s, t1, t1);
}
- gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_FPCR_A64));
+ gen_helper_vfp_muladds(t0, t1, t2, t0, fpstatus_ptr(FPST_A64));
write_fp_sreg_merging(s, a->rd, a->rd, t0);
}
break;
@@ -6721,7 +6721,7 @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
}
gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
- esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64,
+ esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64,
a->idx, fns[esz - 1]);
return true;
}
@@ -6755,7 +6755,7 @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
}
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
- esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64,
+ esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64,
(s->fpcr_ah << 5) | (a->idx << 1) | neg,
fns[esz - 1]);
return true;
@@ -6892,7 +6892,7 @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
if (fp_access_check(s)) {
/* Q bit selects BFMLALB vs BFMLALT. */
gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64,
+ s->fpcr_ah ? FPST_FPCR_AH : FPST_A64,
(a->idx << 1) | a->q,
gen_helper_gvec_bfmlal_idx);
}
@@ -6921,7 +6921,7 @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
}
if (fp_access_check(s)) {
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64,
+ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64,
(a->idx << 2) | a->rot, fn);
}
return true;
@@ -6941,7 +6941,7 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
read_vec_element(s, t0, a->rn, 0, MO_64);
read_vec_element(s, t1, a->rn, 1, MO_64);
- f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64));
+ f->gen_d(t0, t0, t1, fpstatus_ptr(FPST_A64));
write_fp_dreg(s, a->rd, t0);
}
break;
@@ -6952,7 +6952,7 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
read_vec_element_i32(s, t0, a->rn, 0, MO_32);
read_vec_element_i32(s, t1, a->rn, 1, MO_32);
- f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_FPCR_A64));
+ f->gen_s(t0, t0, t1, fpstatus_ptr(FPST_A64));
write_fp_sreg(s, a->rd, t0);
}
break;
@@ -7109,7 +7109,7 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
if (neg_n) {
gen_vfp_maybe_ah_negd(s, tn, tn);
}
- fpst = fpstatus_ptr(FPST_FPCR_A64);
+ fpst = fpstatus_ptr(FPST_A64);
gen_helper_vfp_muladdd(ta, tn, tm, ta, fpst);
write_fp_dreg_merging(s, a->rd, a->ra, ta);
}
@@ -7127,7 +7127,7 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
if (neg_n) {
gen_vfp_maybe_ah_negs(s, tn, tn);
}
- fpst = fpstatus_ptr(FPST_FPCR_A64);
+ fpst = fpstatus_ptr(FPST_A64);
gen_helper_vfp_muladds(ta, tn, tm, ta, fpst);
write_fp_sreg_merging(s, a->rd, a->ra, ta);
}
@@ -7243,7 +7243,7 @@ static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
if (fp_access_check(s)) {
MemOp esz = a->esz;
int elts = (a->q ? 16 : 8) >> esz;
- TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
s->fpcr_ah ? fah : fnormal);
write_fp_sreg(s, a->rd, res);
@@ -7294,7 +7294,7 @@ static void handle_fp_compare(DisasContext *s, int size,
bool cmp_with_zero, bool signal_all_nans)
{
TCGv_i64 tcg_flags = tcg_temp_new_i64();
- TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
if (size == MO_64) {
TCGv_i64 tcg_vn, tcg_vm;
@@ -8829,7 +8829,7 @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
{
return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
a->esz == MO_16 ?
- FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ FPST_FPCR_F16_A64 : FPST_A64);
}
static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
@@ -8866,7 +8866,7 @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
{
- ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64;
+ ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_FPCR_AH : FPST_A64;
TCGv_i32 t32;
int check;
@@ -8944,7 +8944,7 @@ static bool trans_FCVT_s_ds(DisasContext *s, arg_rr *a)
if (fp_access_check(s)) {
TCGv_i32 tcg_rn = read_fp_sreg(s, a->rn);
TCGv_i64 tcg_rd = tcg_temp_new_i64();
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
gen_helper_vfp_fcvtds(tcg_rd, tcg_rn, fpst);
write_fp_dreg_merging(s, a->rd, a->rd, tcg_rd);
@@ -8957,7 +8957,7 @@ static bool trans_FCVT_s_hs(DisasContext *s, arg_rr *a)
if (fp_access_check(s)) {
TCGv_i32 tmp = read_fp_sreg(s, a->rn);
TCGv_i32 ahp = get_ahp_flag();
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
/* write_fp_hreg_merging is OK here because top half of result is zero */
@@ -8971,7 +8971,7 @@ static bool trans_FCVT_s_sd(DisasContext *s, arg_rr *a)
if (fp_access_check(s)) {
TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
TCGv_i32 tcg_rd = tcg_temp_new_i32();
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
gen_helper_vfp_fcvtsd(tcg_rd, tcg_rn, fpst);
write_fp_sreg_merging(s, a->rd, a->rd, tcg_rd);
@@ -8985,7 +8985,7 @@ static bool trans_FCVT_s_hd(DisasContext *s, arg_rr *a)
TCGv_i64 tcg_rn = read_fp_dreg(s, a->rn);
TCGv_i32 tcg_rd = tcg_temp_new_i32();
TCGv_i32 ahp = get_ahp_flag();
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
gen_helper_vfp_fcvt_f64_to_f16(tcg_rd, tcg_rn, fpst, ahp);
/* write_fp_hreg_merging is OK here because top half of tcg_rd is zero */
@@ -9029,7 +9029,7 @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
TCGv_i32 tcg_shift, tcg_single;
TCGv_i64 tcg_double;
- tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
tcg_shift = tcg_constant_i32(shift);
switch (esz) {
@@ -9124,7 +9124,7 @@ static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
TCGv_ptr tcg_fpstatus;
TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
- tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
tcg_shift = tcg_constant_i32(shift);
tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
@@ -9290,7 +9290,7 @@ static bool trans_FJCVTZS(DisasContext *s, arg_FJCVTZS *a)
}
if (fp_access_check(s)) {
TCGv_i64 t = read_fp_dreg(s, a->rn);
- TCGv_ptr fpstatus = fpstatus_ptr(FPST_FPCR_A64);
+ TCGv_ptr fpstatus = fpstatus_ptr(FPST_A64);
gen_helper_fjcvtzs(t, t, fpstatus);
@@ -9550,7 +9550,7 @@ static bool trans_FCVTXN_s(DisasContext *s, arg_rr_e *a)
*/
TCGv_i64 src = read_fp_dreg(s, a->rn);
TCGv_i32 dst = tcg_temp_new_i32();
- gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_FPCR_A64));
+ gen_helper_fcvtx_f64_to_f32(dst, src, fpstatus_ptr(FPST_A64));
write_fp_sreg_merging(s, a->rd, a->rd, dst);
}
return true;
@@ -9638,7 +9638,7 @@ static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i32 tcg_lo = tcg_temp_new_i32();
TCGv_i32 tcg_hi = tcg_temp_new_i32();
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
TCGv_i32 ahp = get_ahp_flag();
tcg_gen_extr_i64_i32(tcg_lo, tcg_hi, n);
@@ -9651,7 +9651,7 @@ static void gen_fcvtn_hs(TCGv_i64 d, TCGv_i64 n)
static void gen_fcvtn_sd(TCGv_i64 d, TCGv_i64 n)
{
TCGv_i32 tmp = tcg_temp_new_i32();
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
gen_helper_vfp_fcvtsd(tmp, n, fpst);
tcg_gen_extu_i32_i64(d, tmp);
@@ -9664,7 +9664,7 @@ static void gen_fcvtxn_sd(TCGv_i64 d, TCGv_i64 n)
* with von Neumann rounding (round to odd)
*/
TCGv_i32 tmp = tcg_temp_new_i32();
- gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_FPCR_A64));
+ gen_helper_fcvtx_f64_to_f32(tmp, n, fpstatus_ptr(FPST_A64));
tcg_gen_extu_i32_i64(d, tmp);
}
@@ -9683,7 +9683,7 @@ TRANS(FCVTXN_v, do_2misc_narrow_vector, a, f_scalar_fcvtxn)
static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
{
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_A64);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_A64);
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_bfcvt_pair(tmp, n, fpst);
tcg_gen_extu_i32_i64(d, tmp);
@@ -9773,7 +9773,7 @@ static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
return check == 0;
}
- fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
if (rmode >= 0) {
tcg_rmode = gen_set_rmode(rmode, fpst);
}
@@ -9848,7 +9848,7 @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
{
return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
esz == MO_16 ? FPST_FPCR_F16_A64 :
- FPST_FPCR_A64);
+ FPST_A64);
}
static bool do_gvec_op2_ah_fpst(DisasContext *s, MemOp esz, bool is_q,
@@ -10008,7 +10008,7 @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
TCGv_i32 tcg_op = tcg_temp_new_i32();
int srcelt = a->q ? 2 : 0;
- fpst = fpstatus_ptr(FPST_FPCR_A64);
+ fpst = fpstatus_ptr(FPST_A64);
for (pass = 0; pass < 2; pass++) {
tcg_res[pass] = tcg_temp_new_i64();
diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
index 29bec7dd7b..fcbb350016 100644
--- a/target/arm/tcg/translate-sme.c
+++ b/target/arm/tcg/translate-sme.c
@@ -358,9 +358,9 @@ static bool do_outprod_env(DisasContext *s, arg_op *a, MemOp esz,
TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_env, a,
MO_32, gen_helper_sme_fmopa_h)
TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
- MO_32, FPST_FPCR_A64, gen_helper_sme_fmopa_s)
+ MO_32, FPST_A64, gen_helper_sme_fmopa_s)
TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
- MO_64, FPST_FPCR_A64, gen_helper_sme_fmopa_d)
+ MO_64, FPST_A64, gen_helper_sme_fmopa_d)
TRANS_FEAT(BFMOPA, aa64_sme, do_outprod_env, a, MO_32, gen_helper_sme_bfmopa)
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 454f7ff900..3cc678154a 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -191,7 +191,7 @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
arg_rrr_esz *a, int data)
{
return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
}
static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
@@ -404,7 +404,7 @@ static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
arg_rprr_esz *a)
{
return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
}
/* Invoke a vector expander on two Zregs and an immediate. */
@@ -3534,7 +3534,7 @@ static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
};
return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
(s->fpcr_ah << 5) | (a->index << 1) | sub,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
}
TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
@@ -3550,7 +3550,7 @@ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
};
TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
+ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
/*
*** SVE Floating Point Fast Reduction Group
@@ -3583,7 +3583,7 @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
- status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
fn(temp, t_zn, t_pg, status, t_desc);
@@ -3659,7 +3659,7 @@ static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
if (sve_access_check(s)) {
unsigned vsz = vec_full_reg_size(s);
TCGv_ptr status =
- fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
@@ -3696,7 +3696,7 @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
ftmad_fns[a->esz], a->rd, a->rn, a->rm,
a->imm | (s->fpcr_ah << 3),
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
+ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
/*
*** SVE Floating Point Accumulating Reduction Group
@@ -3729,7 +3729,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
t_pg = tcg_temp_new_ptr();
tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm));
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
- t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
@@ -3829,7 +3829,7 @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn));
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
- status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16_A64 : FPST_A64);
desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
fn(t_zd, t_zn, t_pg, scalar, status, desc);
}
@@ -3902,7 +3902,7 @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
}
if (sve_access_check(s)) {
unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
vec_full_reg_offset(s, a->rm),
@@ -3935,7 +3935,7 @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
};
TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
+ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
#define DO_FMLA(NAME, name, ah_name) \
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
@@ -3949,7 +3949,7 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
+ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
@@ -3965,36 +3965,36 @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
};
TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
+ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
};
TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
+ a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
/*
*** SVE Floating Point Unary Operations Predicated Group
*/
TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_sh, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvt_sh, a, 0, FPST_A64)
TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR_F16_A64)
TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
gen_helper_sve_bfcvt, a, 0,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64)
+ s->fpcr_ah ? FPST_FPCR_AH : FPST_A64)
TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_dh, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvt_dh, a, 0, FPST_A64)
TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR_F16_A64)
TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_ds, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvt_ds, a, 0, FPST_A64)
TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_sd, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvt_sd, a, 0, FPST_A64)
TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16_A64)
@@ -4010,22 +4010,22 @@ TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16_A64)
TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_ss, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64)
TRANS_FEAT(FCVTZU_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_ss, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvtzu_ss, a, 0, FPST_A64)
TRANS_FEAT(FCVTZS_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_sd, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvtzs_sd, a, 0, FPST_A64)
TRANS_FEAT(FCVTZU_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_sd, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvtzu_sd, a, 0, FPST_A64)
TRANS_FEAT(FCVTZS_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_ds, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvtzs_ds, a, 0, FPST_A64)
TRANS_FEAT(FCVTZU_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_ds, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvtzu_ds, a, 0, FPST_A64)
TRANS_FEAT(FCVTZS_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_dd, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvtzs_dd, a, 0, FPST_A64)
TRANS_FEAT(FCVTZU_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_dd, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_fcvtzu_dd, a, 0, FPST_A64)
static gen_helper_gvec_3_ptr * const frint_fns[] = {
NULL,
@@ -4034,7 +4034,7 @@ static gen_helper_gvec_3_ptr * const frint_fns[] = {
gen_helper_sve_frint_d
};
TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
static gen_helper_gvec_3_ptr * const frintx_fns[] = {
NULL,
@@ -4043,7 +4043,7 @@ static gen_helper_gvec_3_ptr * const frintx_fns[] = {
gen_helper_sve_frintx_d
};
TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
ARMFPRounding mode, gen_helper_gvec_3_ptr *fn)
@@ -4060,7 +4060,7 @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
}
vsz = vec_full_reg_size(s);
- status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64);
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
tmode = gen_set_rmode(mode, status);
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
@@ -4095,7 +4095,7 @@ static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
};
TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16_A64)
@@ -4105,14 +4105,14 @@ TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16_A64)
TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_ss, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_scvt_ss, a, 0, FPST_A64)
TRANS_FEAT(SCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_ds, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_scvt_ds, a, 0, FPST_A64)
TRANS_FEAT(SCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_sd, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_scvt_sd, a, 0, FPST_A64)
TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_dd, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_scvt_dd, a, 0, FPST_A64)
TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16_A64)
@@ -4122,14 +4122,14 @@ TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16_A64)
TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_ss, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_ucvt_ss, a, 0, FPST_A64)
TRANS_FEAT(UCVTF_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_ds, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_ucvt_ds, a, 0, FPST_A64)
TRANS_FEAT(UCVTF_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_sd, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_ucvt_sd, a, 0, FPST_A64)
TRANS_FEAT(UCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_dd, a, 0, FPST_FPCR_A64)
+ gen_helper_sve_ucvt_dd, a, 0, FPST_A64)
/*
*** SVE Memory - 32-bit Gather and Unsized Contiguous Group
@@ -7011,10 +7011,10 @@ DO_ZPZZ_FP(FMINP, aa64_sve2, sve2_fminp_zpzz)
TRANS_FEAT_NONSTREAMING(FMMLA_s, aa64_sve_f32mm, gen_gvec_fpst_zzzz,
gen_helper_fmmla_s, a->rd, a->rn, a->rm, a->ra,
- 0, FPST_FPCR_A64)
+ 0, FPST_A64)
TRANS_FEAT_NONSTREAMING(FMMLA_d, aa64_sve_f64mm, gen_gvec_fpst_zzzz,
gen_helper_fmmla_d, a->rd, a->rn, a->rm, a->ra,
- 0, FPST_FPCR_A64)
+ 0, FPST_A64)
static gen_helper_gvec_4 * const sqdmlal_zzzw_fns[] = {
NULL, gen_helper_sve2_sqdmlal_zzzw_h,
@@ -7130,18 +7130,18 @@ TRANS_FEAT_NONSTREAMING(RAX1, aa64_sve2_sha3, gen_gvec_fn_arg_zzz,
gen_gvec_rax1, a)
TRANS_FEAT(FCVTNT_sh, aa64_sve2, gen_gvec_fpst_arg_zpz,
- gen_helper_sve2_fcvtnt_sh, a, 0, FPST_FPCR_A64)
+ gen_helper_sve2_fcvtnt_sh, a, 0, FPST_A64)
TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
- gen_helper_sve2_fcvtnt_ds, a, 0, FPST_FPCR_A64)
+ gen_helper_sve2_fcvtnt_ds, a, 0, FPST_A64)
TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
gen_helper_sve_bfcvtnt, a, 0,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64)
+ s->fpcr_ah ? FPST_FPCR_AH : FPST_A64)
TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
- gen_helper_sve2_fcvtlt_hs, a, 0, FPST_FPCR_A64)
+ gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64)
TRANS_FEAT(FCVTLT_sd, aa64_sve2, gen_gvec_fpst_arg_zpz,
- gen_helper_sve2_fcvtlt_sd, a, 0, FPST_FPCR_A64)
+ gen_helper_sve2_fcvtlt_sd, a, 0, FPST_A64)
TRANS_FEAT(FCVTX_ds, aa64_sve2, do_frint_mode, a,
FPROUNDING_ODD, gen_helper_sve_fcvt_ds)
@@ -7153,7 +7153,7 @@ static gen_helper_gvec_3_ptr * const flogb_fns[] = {
gen_helper_flogb_s, gen_helper_flogb_d
};
TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
+ a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
{
@@ -7198,7 +7198,7 @@ static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
{
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
a->rd, a->rn, a->rm, a->ra, sel,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64);
+ s->fpcr_ah ? FPST_FPCR_AH : FPST_A64);
}
TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
@@ -7209,7 +7209,7 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
a->rd, a->rn, a->rm, a->ra,
(a->index << 1) | sel,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_FPCR_A64);
+ s->fpcr_ah ? FPST_FPCR_AH : FPST_A64);
}
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 03/34] target/arm: Rename FPST_FPCR_F16_A32 to FPST_A32_F16
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
2025-01-29 1:38 ` [PATCH v2 01/34] target/arm: Rename FPST_FPCR_A32 to FPST_A32 Richard Henderson
2025-01-29 1:38 ` [PATCH v2 02/34] target/arm: Rename FPST_FPCR_A64 to FPST_A64 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:42 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 04/34] target/arm: Rename FPST_FPCR_F16_A64 to FPST_A64_F16 Richard Henderson
` (30 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate.h | 6 +++---
target/arm/tcg/translate-vfp.c | 24 ++++++++++++------------
2 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index 2edb707b85..adf6eb8b91 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -676,7 +676,7 @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
typedef enum ARMFPStatusFlavour {
FPST_A32,
FPST_A64,
- FPST_FPCR_F16_A32,
+ FPST_A32_F16,
FPST_FPCR_F16_A64,
FPST_FPCR_AH,
FPST_FPCR_AH_F16,
@@ -696,7 +696,7 @@ typedef enum ARMFPStatusFlavour {
* for AArch32 non-FP16 operations controlled by the FPCR
* FPST_A64
* for AArch64 non-FP16 operations controlled by the FPCR
- * FPST_FPCR_F16_A32
+ * FPST_A32_F16
* for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
* FPST_FPCR_F16_A64
* for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used
@@ -723,7 +723,7 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
case FPST_A64:
offset = offsetof(CPUARMState, vfp.fp_status_a64);
break;
- case FPST_FPCR_F16_A32:
+ case FPST_A32_F16:
offset = offsetof(CPUARMState, vfp.fp_status_f16_a32);
break;
case FPST_FPCR_F16_A64:
diff --git a/target/arm/tcg/translate-vfp.c b/target/arm/tcg/translate-vfp.c
index 4cc12a407b..8d9d1ab877 100644
--- a/target/arm/tcg/translate-vfp.c
+++ b/target/arm/tcg/translate-vfp.c
@@ -460,7 +460,7 @@ static bool trans_VRINT(DisasContext *s, arg_VRINT *a)
}
if (sz == 1) {
- fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
+ fpst = fpstatus_ptr(FPST_A32_F16);
} else {
fpst = fpstatus_ptr(FPST_A32);
}
@@ -527,7 +527,7 @@ static bool trans_VCVT(DisasContext *s, arg_VCVT *a)
}
if (sz == 1) {
- fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
+ fpst = fpstatus_ptr(FPST_A32_F16);
} else {
fpst = fpstatus_ptr(FPST_A32);
}
@@ -1433,7 +1433,7 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
/*
* Do a half-precision operation. Functionally this is
* the same as do_vfp_3op_sp(), except:
- * - it uses the FPST_FPCR_F16_A32
+ * - it uses the FPST_A32_F16
* - it doesn't need the VFP vector handling (fp16 is a
* v8 feature, and in v8 VFP vectors don't exist)
* - it does the aa32_fp16_arith feature test
@@ -1456,7 +1456,7 @@ static bool do_vfp_3op_hp(DisasContext *s, VFPGen3OpSPFn *fn,
f0 = tcg_temp_new_i32();
f1 = tcg_temp_new_i32();
fd = tcg_temp_new_i32();
- fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
+ fpst = fpstatus_ptr(FPST_A32_F16);
vfp_load_reg16(f0, vn);
vfp_load_reg16(f1, vm);
@@ -2122,7 +2122,7 @@ static bool do_vfm_hp(DisasContext *s, arg_VFMA_sp *a, bool neg_n, bool neg_d)
/* VFNMA, VFNMS */
gen_vfp_negh(vd, vd);
}
- fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
+ fpst = fpstatus_ptr(FPST_A32_F16);
gen_helper_vfp_muladdh(vd, vn, vm, vd, fpst);
vfp_store_reg32(vd, a->vd);
return true;
@@ -2424,7 +2424,7 @@ DO_VFP_2OP(VNEG, dp, gen_vfp_negd, aa32_fpdp_v2)
static void gen_VSQRT_hp(TCGv_i32 vd, TCGv_i32 vm)
{
- gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_FPCR_F16_A32));
+ gen_helper_vfp_sqrth(vd, vm, fpstatus_ptr(FPST_A32_F16));
}
static void gen_VSQRT_sp(TCGv_i32 vd, TCGv_i32 vm)
@@ -2706,7 +2706,7 @@ static bool trans_VRINTR_hp(DisasContext *s, arg_VRINTR_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg16(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
+ fpst = fpstatus_ptr(FPST_A32_F16);
gen_helper_rinth(tmp, tmp, fpst);
vfp_store_reg32(tmp, a->vd);
return true;
@@ -2779,7 +2779,7 @@ static bool trans_VRINTZ_hp(DisasContext *s, arg_VRINTZ_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg16(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
+ fpst = fpstatus_ptr(FPST_A32_F16);
tcg_rmode = gen_set_rmode(FPROUNDING_ZERO, fpst);
gen_helper_rinth(tmp, tmp, fpst);
gen_restore_rmode(tcg_rmode, fpst);
@@ -2859,7 +2859,7 @@ static bool trans_VRINTX_hp(DisasContext *s, arg_VRINTX_sp *a)
tmp = tcg_temp_new_i32();
vfp_load_reg16(tmp, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
+ fpst = fpstatus_ptr(FPST_A32_F16);
gen_helper_rinth_exact(tmp, tmp, fpst);
vfp_store_reg32(tmp, a->vd);
return true;
@@ -2983,7 +2983,7 @@ static bool trans_VCVT_int_hp(DisasContext *s, arg_VCVT_int_sp *a)
vm = tcg_temp_new_i32();
vfp_load_reg32(vm, a->vm);
- fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
+ fpst = fpstatus_ptr(FPST_A32_F16);
if (a->s) {
/* i32 -> f16 */
gen_helper_vfp_sitoh(vm, vm, fpst);
@@ -3105,7 +3105,7 @@ static bool trans_VCVT_fix_hp(DisasContext *s, arg_VCVT_fix_sp *a)
vd = tcg_temp_new_i32();
vfp_load_reg32(vd, a->vd);
- fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
+ fpst = fpstatus_ptr(FPST_A32_F16);
shift = tcg_constant_i32(frac_bits);
/* Switch on op:U:sx bits */
@@ -3273,7 +3273,7 @@ static bool trans_VCVT_hp_int(DisasContext *s, arg_VCVT_sp_int *a)
return true;
}
- fpst = fpstatus_ptr(FPST_FPCR_F16_A32);
+ fpst = fpstatus_ptr(FPST_A32_F16);
vm = tcg_temp_new_i32();
vfp_load_reg16(vm, a->vm);
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 04/34] target/arm: Rename FPST_FPCR_F16_A64 to FPST_A64_F16
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (2 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 03/34] target/arm: Rename FPST_FPCR_F16_A32 to FPST_A32_F16 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:42 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 05/34] target/arm: Rename FPST_FPCR_AH* to FPST_AH* Richard Henderson
` (29 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate.h | 8 ++---
target/arm/tcg/translate-a64.c | 44 +++++++++++------------
target/arm/tcg/translate-sve.c | 66 +++++++++++++++++-----------------
3 files changed, 59 insertions(+), 59 deletions(-)
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index adf6eb8b91..cc753419ed 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -677,7 +677,7 @@ typedef enum ARMFPStatusFlavour {
FPST_A32,
FPST_A64,
FPST_A32_F16,
- FPST_FPCR_F16_A64,
+ FPST_A64_F16,
FPST_FPCR_AH,
FPST_FPCR_AH_F16,
FPST_STD,
@@ -698,7 +698,7 @@ typedef enum ARMFPStatusFlavour {
* for AArch64 non-FP16 operations controlled by the FPCR
* FPST_A32_F16
* for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
- * FPST_FPCR_F16_A64
+ * FPST_A64_F16
* for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used
* FPST_FPCR_AH:
* for AArch64 operations which change behaviour when AH=1 (specifically,
@@ -726,7 +726,7 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
case FPST_A32_F16:
offset = offsetof(CPUARMState, vfp.fp_status_f16_a32);
break;
- case FPST_FPCR_F16_A64:
+ case FPST_A64_F16:
offset = offsetof(CPUARMState, vfp.fp_status_f16_a64);
break;
case FPST_FPCR_AH:
@@ -757,7 +757,7 @@ static inline ARMFPStatusFlavour select_fpst(DisasContext *s, MemOp esz)
if (s->fpcr_ah) {
return esz == MO_16 ? FPST_FPCR_AH_F16 : FPST_FPCR_AH;
} else {
- return esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64;
+ return esz == MO_16 ? FPST_A64_F16 : FPST_A64;
}
}
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index bf17ecca80..35d409685c 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5247,7 +5247,7 @@ static bool do_fp3_scalar(DisasContext *s, arg_rrr_e *a, const FPScalar *f,
{
return do_fp3_scalar_with_fpsttype(s, a, f, mergereg,
a->esz == MO_16 ?
- FPST_FPCR_F16_A64 : FPST_A64);
+ FPST_A64_F16 : FPST_A64);
}
static bool do_fp3_scalar_ah_2fn(DisasContext *s, arg_rrr_e *a,
@@ -5533,9 +5533,9 @@ static bool do_fcmp0_s(DisasContext *s, arg_rr_e *a,
TCGv_i32 t0 = read_fp_hreg(s, a->rn);
TCGv_i32 t1 = tcg_constant_i32(0);
if (swap) {
- f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_FPCR_F16_A64));
+ f->gen_h(t0, t1, t0, fpstatus_ptr(FPST_A64_F16));
} else {
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16_A64));
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
}
write_fp_sreg(s, a->rd, t0);
}
@@ -5768,7 +5768,7 @@ static bool do_fp3_vector(DisasContext *s, arg_qrrr_e *a, int data,
{
return do_fp3_vector_with_fpsttype(s, a, data, fns,
a->esz == MO_16 ?
- FPST_FPCR_F16_A64 :FPST_A64);
+ FPST_A64_F16 :FPST_A64);
}
static bool do_fp3_vector_2fn(DisasContext *s, arg_qrrr_e *a, int data,
@@ -6174,7 +6174,7 @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
}
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
a->rot, fn[a->esz]);
return true;
}
@@ -6566,7 +6566,7 @@ static bool do_fp3_scalar_idx(DisasContext *s, arg_rrx_e *a, const FPScalar *f)
TCGv_i32 t1 = tcg_temp_new_i32();
read_vec_element_i32(s, t1, a->rm, a->idx, MO_16);
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16_A64));
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
write_fp_hreg_merging(s, a->rd, a->rn, t0);
}
break;
@@ -6624,7 +6624,7 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
gen_vfp_maybe_ah_negh(s, t1, t1);
}
gen_helper_advsimd_muladdh(t0, t1, t2, t0,
- fpstatus_ptr(FPST_FPCR_F16_A64));
+ fpstatus_ptr(FPST_A64_F16));
write_fp_hreg_merging(s, a->rd, a->rd, t0);
}
break;
@@ -6721,7 +6721,7 @@ static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
}
gen_gvec_op3_fpst(s, a->q, a->rd, a->rn, a->rm,
- esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64,
+ esz == MO_16 ? FPST_A64_F16 : FPST_A64,
a->idx, fns[esz - 1]);
return true;
}
@@ -6755,7 +6755,7 @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
}
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
- esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64,
+ esz == MO_16 ? FPST_A64_F16 : FPST_A64,
(s->fpcr_ah << 5) | (a->idx << 1) | neg,
fns[esz - 1]);
return true;
@@ -6921,7 +6921,7 @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
}
if (fp_access_check(s)) {
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
(a->idx << 2) | a->rot, fn);
}
return true;
@@ -6966,7 +6966,7 @@ static bool do_fp3_scalar_pair(DisasContext *s, arg_rr_e *a, const FPScalar *f)
read_vec_element_i32(s, t0, a->rn, 0, MO_16);
read_vec_element_i32(s, t1, a->rn, 1, MO_16);
- f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_FPCR_F16_A64));
+ f->gen_h(t0, t0, t1, fpstatus_ptr(FPST_A64_F16));
write_fp_sreg(s, a->rd, t0);
}
break;
@@ -7148,7 +7148,7 @@ static bool do_fmadd(DisasContext *s, arg_rrrr_e *a, bool neg_a, bool neg_n)
if (neg_n) {
gen_vfp_maybe_ah_negh(s, tn, tn);
}
- fpst = fpstatus_ptr(FPST_FPCR_F16_A64);
+ fpst = fpstatus_ptr(FPST_A64_F16);
gen_helper_advsimd_muladdh(ta, tn, tm, ta, fpst);
write_fp_hreg_merging(s, a->rd, a->ra, ta);
}
@@ -7243,7 +7243,7 @@ static bool do_fp_reduction(DisasContext *s, arg_qrr_e *a,
if (fp_access_check(s)) {
MemOp esz = a->esz;
int elts = (a->q ? 16 : 8) >> esz;
- TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ TCGv_ptr fpst = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
TCGv_i32 res = do_reduction_op(s, a->rn, esz, 0, elts, fpst,
s->fpcr_ah ? fah : fnormal);
write_fp_sreg(s, a->rd, res);
@@ -7294,7 +7294,7 @@ static void handle_fp_compare(DisasContext *s, int size,
bool cmp_with_zero, bool signal_all_nans)
{
TCGv_i64 tcg_flags = tcg_temp_new_i64();
- TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ TCGv_ptr fpst = fpstatus_ptr(size == MO_16 ? FPST_A64_F16 : FPST_A64);
if (size == MO_64) {
TCGv_i64 tcg_vn, tcg_vm;
@@ -8829,7 +8829,7 @@ static bool do_fp1_scalar(DisasContext *s, arg_rr_e *a,
{
return do_fp1_scalar_with_fpsttype(s, a, f, rmode,
a->esz == MO_16 ?
- FPST_FPCR_F16_A64 : FPST_A64);
+ FPST_A64_F16 : FPST_A64);
}
static bool do_fp1_scalar_ah(DisasContext *s, arg_rr_e *a,
@@ -8999,7 +8999,7 @@ static bool trans_FCVT_s_sh(DisasContext *s, arg_rr *a)
if (fp_access_check(s)) {
TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
TCGv_i32 tcg_rd = tcg_temp_new_i32();
- TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR_F16_A64);
+ TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
TCGv_i32 tcg_ahp = get_ahp_flag();
gen_helper_vfp_fcvt_f16_to_f32(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
@@ -9013,7 +9013,7 @@ static bool trans_FCVT_s_dh(DisasContext *s, arg_rr *a)
if (fp_access_check(s)) {
TCGv_i32 tcg_rn = read_fp_hreg(s, a->rn);
TCGv_i64 tcg_rd = tcg_temp_new_i64();
- TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_FPCR_F16_A64);
+ TCGv_ptr tcg_fpst = fpstatus_ptr(FPST_A64_F16);
TCGv_i32 tcg_ahp = get_ahp_flag();
gen_helper_vfp_fcvt_f16_to_f64(tcg_rd, tcg_rn, tcg_fpst, tcg_ahp);
@@ -9029,7 +9029,7 @@ static bool do_cvtf_scalar(DisasContext *s, MemOp esz, int rd, int shift,
TCGv_i32 tcg_shift, tcg_single;
TCGv_i64 tcg_double;
- tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
tcg_shift = tcg_constant_i32(shift);
switch (esz) {
@@ -9124,7 +9124,7 @@ static void do_fcvt_scalar(DisasContext *s, MemOp out, MemOp esz,
TCGv_ptr tcg_fpstatus;
TCGv_i32 tcg_shift, tcg_rmode, tcg_single;
- tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ tcg_fpstatus = fpstatus_ptr(esz == MO_16 ? FPST_A64_F16 : FPST_A64);
tcg_shift = tcg_constant_i32(shift);
tcg_rmode = gen_set_rmode(rmode, tcg_fpstatus);
@@ -9773,7 +9773,7 @@ static bool do_fp1_vector(DisasContext *s, arg_qrr_e *a,
return check == 0;
}
- fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
if (rmode >= 0) {
tcg_rmode = gen_set_rmode(rmode, fpst);
}
@@ -9847,7 +9847,7 @@ static bool do_gvec_op2_fpst(DisasContext *s, MemOp esz, bool is_q,
gen_helper_gvec_2_ptr * const fns[3])
{
return do_gvec_op2_fpst_with_fpsttype(s, esz, is_q, rd, rn, data, fns,
- esz == MO_16 ? FPST_FPCR_F16_A64 :
+ esz == MO_16 ? FPST_A64_F16 :
FPST_A64);
}
@@ -10024,7 +10024,7 @@ static bool trans_FCVTL_v(DisasContext *s, arg_qrr_e *a)
TCGv_i32 tcg_res[4];
TCGv_i32 ahp = get_ahp_flag();
- fpst = fpstatus_ptr(FPST_FPCR_F16_A64);
+ fpst = fpstatus_ptr(FPST_A64_F16);
for (pass = 0; pass < 4; pass++) {
tcg_res[pass] = tcg_temp_new_i32();
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 3cc678154a..3811316a2d 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -191,7 +191,7 @@ static bool gen_gvec_fpst_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
arg_rrr_esz *a, int data)
{
return gen_gvec_fpst_zzz(s, fn, a->rd, a->rn, a->rm, data,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
}
static bool gen_gvec_fpst_ah_arg_zzz(DisasContext *s, gen_helper_gvec_3_ptr *fn,
@@ -404,7 +404,7 @@ static bool gen_gvec_fpst_arg_zpzz(DisasContext *s, gen_helper_gvec_4_ptr *fn,
arg_rprr_esz *a)
{
return gen_gvec_fpst_zzzp(s, fn, a->rd, a->rn, a->rm, a->pg, 0,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
}
/* Invoke a vector expander on two Zregs and an immediate. */
@@ -3534,7 +3534,7 @@ static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
};
return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
(s->fpcr_ah << 5) | (a->index << 1) | sub,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
}
TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
@@ -3550,7 +3550,7 @@ static gen_helper_gvec_3_ptr * const fmul_idx_fns[4] = {
};
TRANS_FEAT(FMUL_zzx, aa64_sve, gen_gvec_fpst_zzz,
fmul_idx_fns[a->esz], a->rd, a->rn, a->rm, a->index,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
/*
*** SVE Floating Point Fast Reduction Group
@@ -3583,7 +3583,7 @@ static bool do_reduce(DisasContext *s, arg_rpr_esz *a,
tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, a->rn));
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
- status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
fn(temp, t_zn, t_pg, status, t_desc);
@@ -3659,7 +3659,7 @@ static bool do_ppz_fp(DisasContext *s, arg_rpr_esz *a,
if (sve_access_check(s)) {
unsigned vsz = vec_full_reg_size(s);
TCGv_ptr status =
- fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
@@ -3696,7 +3696,7 @@ static gen_helper_gvec_3_ptr * const ftmad_fns[4] = {
TRANS_FEAT_NONSTREAMING(FTMAD, aa64_sve, gen_gvec_fpst_zzz,
ftmad_fns[a->esz], a->rd, a->rn, a->rm,
a->imm | (s->fpcr_ah << 3),
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
/*
*** SVE Floating Point Accumulating Reduction Group
@@ -3729,7 +3729,7 @@ static bool trans_FADDA(DisasContext *s, arg_rprr_esz *a)
t_pg = tcg_temp_new_ptr();
tcg_gen_addi_ptr(t_rm, tcg_env, vec_full_reg_offset(s, a->rm));
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, a->pg));
- t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ t_fpst = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
t_desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
fns[a->esz - 1](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
@@ -3829,7 +3829,7 @@ static void do_fp_scalar(DisasContext *s, int zd, int zn, int pg, bool is_fp16,
tcg_gen_addi_ptr(t_zn, tcg_env, vec_full_reg_offset(s, zn));
tcg_gen_addi_ptr(t_pg, tcg_env, pred_full_reg_offset(s, pg));
- status = fpstatus_ptr(is_fp16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ status = fpstatus_ptr(is_fp16 ? FPST_A64_F16 : FPST_A64);
desc = tcg_constant_i32(simd_desc(vsz, vsz, 0));
fn(t_zd, t_zn, t_pg, scalar, status, desc);
}
@@ -3902,7 +3902,7 @@ static bool do_fp_cmp(DisasContext *s, arg_rprr_esz *a,
}
if (sve_access_check(s)) {
unsigned vsz = vec_full_reg_size(s);
- TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ TCGv_ptr status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
vec_full_reg_offset(s, a->rm),
@@ -3935,7 +3935,7 @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
};
TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
#define DO_FMLA(NAME, name, ah_name) \
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
@@ -3949,7 +3949,7 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
@@ -3965,14 +3965,14 @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
};
TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
NULL, gen_helper_gvec_fcmlah_idx, gen_helper_gvec_fcmlas_idx, NULL
};
TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
a->rd, a->rn, a->rm, a->ra, a->index * 4 + a->rot,
- a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
/*
*** SVE Floating Point Unary Operations Predicated Group
@@ -3981,7 +3981,7 @@ TRANS_FEAT(FCMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz, fcmla_idx_fns[a->esz],
TRANS_FEAT(FCVT_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_fcvt_sh, a, 0, FPST_A64)
TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_hs, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_fcvt_hs, a, 0, FPST_A64_F16)
TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
gen_helper_sve_bfcvt, a, 0,
@@ -3990,24 +3990,24 @@ TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_fcvt_dh, a, 0, FPST_A64)
TRANS_FEAT(FCVT_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvt_hd, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_fcvt_hd, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVT_ds, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_fcvt_ds, a, 0, FPST_A64)
TRANS_FEAT(FCVT_sd, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_fcvt_sd, a, 0, FPST_A64)
TRANS_FEAT(FCVTZS_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_hh, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_fcvtzs_hh, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZU_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_hh, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_fcvtzu_hh, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZS_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_hs, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_fcvtzs_hs, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZU_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_hs, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_fcvtzu_hs, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZS_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzs_hd, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_fcvtzs_hd, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZU_hd, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_fcvtzu_hd, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_fcvtzu_hd, a, 0, FPST_A64_F16)
TRANS_FEAT(FCVTZS_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_fcvtzs_ss, a, 0, FPST_A64)
@@ -4034,7 +4034,7 @@ static gen_helper_gvec_3_ptr * const frint_fns[] = {
gen_helper_sve_frint_d
};
TRANS_FEAT(FRINTI, aa64_sve, gen_gvec_fpst_arg_zpz, frint_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
static gen_helper_gvec_3_ptr * const frintx_fns[] = {
NULL,
@@ -4043,7 +4043,7 @@ static gen_helper_gvec_3_ptr * const frintx_fns[] = {
gen_helper_sve_frintx_d
};
TRANS_FEAT(FRINTX, aa64_sve, gen_gvec_fpst_arg_zpz, frintx_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
ARMFPRounding mode, gen_helper_gvec_3_ptr *fn)
@@ -4060,7 +4060,7 @@ static bool do_frint_mode(DisasContext *s, arg_rpr_esz *a,
}
vsz = vec_full_reg_size(s);
- status = fpstatus_ptr(a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64);
+ status = fpstatus_ptr(a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
tmode = gen_set_rmode(mode, status);
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
@@ -4095,14 +4095,14 @@ static gen_helper_gvec_3_ptr * const fsqrt_fns[] = {
gen_helper_sve_fsqrt_s, gen_helper_sve_fsqrt_d,
};
TRANS_FEAT(FSQRT, aa64_sve, gen_gvec_fpst_arg_zpz, fsqrt_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
TRANS_FEAT(SCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_hh, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_scvt_hh, a, 0, FPST_A64_F16)
TRANS_FEAT(SCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_sh, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_scvt_sh, a, 0, FPST_A64_F16)
TRANS_FEAT(SCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_scvt_dh, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_scvt_dh, a, 0, FPST_A64_F16)
TRANS_FEAT(SCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_scvt_ss, a, 0, FPST_A64)
@@ -4115,11 +4115,11 @@ TRANS_FEAT(SCVTF_dd, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_scvt_dd, a, 0, FPST_A64)
TRANS_FEAT(UCVTF_hh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_hh, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_ucvt_hh, a, 0, FPST_A64_F16)
TRANS_FEAT(UCVTF_sh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_sh, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_ucvt_sh, a, 0, FPST_A64_F16)
TRANS_FEAT(UCVTF_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
- gen_helper_sve_ucvt_dh, a, 0, FPST_FPCR_F16_A64)
+ gen_helper_sve_ucvt_dh, a, 0, FPST_A64_F16)
TRANS_FEAT(UCVTF_ss, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_ucvt_ss, a, 0, FPST_A64)
@@ -7153,7 +7153,7 @@ static gen_helper_gvec_3_ptr * const flogb_fns[] = {
gen_helper_flogb_s, gen_helper_flogb_d
};
TRANS_FEAT(FLOGB, aa64_sve2, gen_gvec_fpst_arg_zpz, flogb_fns[a->esz],
- a, 0, a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_A64)
+ a, 0, a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
static bool do_FMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sub, bool sel)
{
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 05/34] target/arm: Rename FPST_FPCR_AH* to FPST_AH*
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (3 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 04/34] target/arm: Rename FPST_FPCR_F16_A64 to FPST_A64_F16 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:42 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 06/34] target/arm: Introduce CPUARMState.vfp.fp_status[] Richard Henderson
` (28 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate.h | 14 +++++++-------
target/arm/tcg/translate-a64.c | 8 ++++----
target/arm/tcg/translate-sve.c | 8 ++++----
3 files changed, 15 insertions(+), 15 deletions(-)
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index cc753419ed..d4ae39c469 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -678,8 +678,8 @@ typedef enum ARMFPStatusFlavour {
FPST_A64,
FPST_A32_F16,
FPST_A64_F16,
- FPST_FPCR_AH,
- FPST_FPCR_AH_F16,
+ FPST_AH,
+ FPST_AH_F16,
FPST_STD,
FPST_STD_F16,
} ARMFPStatusFlavour;
@@ -700,11 +700,11 @@ typedef enum ARMFPStatusFlavour {
* for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
* FPST_A64_F16
* for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used
- * FPST_FPCR_AH:
+ * FPST_AH:
* for AArch64 operations which change behaviour when AH=1 (specifically,
* bfloat16 conversions and multiplies, and the reciprocal and square root
* estimate/step insns)
- * FPST_FPCR_AH_F16:
+ * FPST_AH_F16:
* ditto, but for half-precision operations
* FPST_STD
* for A32/T32 Neon operations using the "standard FPSCR value"
@@ -729,10 +729,10 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
case FPST_A64_F16:
offset = offsetof(CPUARMState, vfp.fp_status_f16_a64);
break;
- case FPST_FPCR_AH:
+ case FPST_AH:
offset = offsetof(CPUARMState, vfp.ah_fp_status);
break;
- case FPST_FPCR_AH_F16:
+ case FPST_AH_F16:
offset = offsetof(CPUARMState, vfp.ah_fp_status_f16);
break;
case FPST_STD:
@@ -755,7 +755,7 @@ static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
static inline ARMFPStatusFlavour select_fpst(DisasContext *s, MemOp esz)
{
if (s->fpcr_ah) {
- return esz == MO_16 ? FPST_FPCR_AH_F16 : FPST_FPCR_AH;
+ return esz == MO_16 ? FPST_AH_F16 : FPST_AH;
} else {
return esz == MO_16 ? FPST_A64_F16 : FPST_A64;
}
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 35d409685c..715760a17b 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -6135,7 +6135,7 @@ static bool trans_BFMLAL_v(DisasContext *s, arg_qrrr_e *a)
if (fp_access_check(s)) {
/* Q bit selects BFMLALB vs BFMLALT. */
gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_A64, a->q,
+ s->fpcr_ah ? FPST_AH : FPST_A64, a->q,
gen_helper_gvec_bfmlal);
}
return true;
@@ -6892,7 +6892,7 @@ static bool trans_BFMLAL_vi(DisasContext *s, arg_qrrx_e *a)
if (fp_access_check(s)) {
/* Q bit selects BFMLALB vs BFMLALT. */
gen_gvec_op4_fpst(s, true, a->rd, a->rn, a->rm, a->rd,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_A64,
+ s->fpcr_ah ? FPST_AH : FPST_A64,
(a->idx << 1) | a->q,
gen_helper_gvec_bfmlal_idx);
}
@@ -8866,7 +8866,7 @@ TRANS(FRINTX_s, do_fp1_scalar, a, &f_scalar_frintx, -1)
static bool trans_BFCVT_s(DisasContext *s, arg_rr_e *a)
{
- ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_FPCR_AH : FPST_A64;
+ ARMFPStatusFlavour fpsttype = s->fpcr_ah ? FPST_AH : FPST_A64;
TCGv_i32 t32;
int check;
@@ -9691,7 +9691,7 @@ static void gen_bfcvtn_hs(TCGv_i64 d, TCGv_i64 n)
static void gen_bfcvtn_ah_hs(TCGv_i64 d, TCGv_i64 n)
{
- TCGv_ptr fpst = fpstatus_ptr(FPST_FPCR_AH);
+ TCGv_ptr fpst = fpstatus_ptr(FPST_AH);
TCGv_i32 tmp = tcg_temp_new_i32();
gen_helper_bfcvt_pair(tmp, n, fpst);
tcg_gen_extu_i32_i64(d, tmp);
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 3811316a2d..cb6bb27622 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -3985,7 +3985,7 @@ TRANS_FEAT(FCVT_hs, aa64_sve, gen_gvec_fpst_arg_zpz,
TRANS_FEAT(BFCVT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
gen_helper_sve_bfcvt, a, 0,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_A64)
+ s->fpcr_ah ? FPST_AH : FPST_A64)
TRANS_FEAT(FCVT_dh, aa64_sve, gen_gvec_fpst_arg_zpz,
gen_helper_sve_fcvt_dh, a, 0, FPST_A64)
@@ -7136,7 +7136,7 @@ TRANS_FEAT(FCVTNT_ds, aa64_sve2, gen_gvec_fpst_arg_zpz,
TRANS_FEAT(BFCVTNT, aa64_sve_bf16, gen_gvec_fpst_arg_zpz,
gen_helper_sve_bfcvtnt, a, 0,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_A64)
+ s->fpcr_ah ? FPST_AH : FPST_A64)
TRANS_FEAT(FCVTLT_hs, aa64_sve2, gen_gvec_fpst_arg_zpz,
gen_helper_sve2_fcvtlt_hs, a, 0, FPST_A64)
@@ -7198,7 +7198,7 @@ static bool do_BFMLAL_zzzw(DisasContext *s, arg_rrrr_esz *a, bool sel)
{
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal,
a->rd, a->rn, a->rm, a->ra, sel,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_A64);
+ s->fpcr_ah ? FPST_AH : FPST_A64);
}
TRANS_FEAT(BFMLALB_zzzw, aa64_sve_bf16, do_BFMLAL_zzzw, a, false)
@@ -7209,7 +7209,7 @@ static bool do_BFMLAL_zzxw(DisasContext *s, arg_rrxr_esz *a, bool sel)
return gen_gvec_fpst_zzzz(s, gen_helper_gvec_bfmlal_idx,
a->rd, a->rn, a->rm, a->ra,
(a->index << 1) | sel,
- s->fpcr_ah ? FPST_FPCR_AH : FPST_A64);
+ s->fpcr_ah ? FPST_AH : FPST_A64);
}
TRANS_FEAT(BFMLALB_zzxw, aa64_sve_bf16, do_BFMLAL_zzxw, a, false)
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 06/34] target/arm: Introduce CPUARMState.vfp.fp_status[]
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (4 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 05/34] target/arm: Rename FPST_FPCR_AH* to FPST_AH* Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:34 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 07/34] target/arm: Remove standard_fp_status_f16 Richard Henderson
` (27 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Move ARMFPStatusFlavour to cpu.h with which to index
this array. For now, place the array in an anonymous
union with the existing structures. Adjust the order
of the existing structures to match the enum.
Simplify fpstatus_ptr() using the new array.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 119 +++++++++++++++++++++----------------
target/arm/tcg/translate.h | 64 +-------------------
2 files changed, 70 insertions(+), 113 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index f562e0687c..c025649ff2 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -202,6 +202,61 @@ typedef struct ARMMMUFaultInfo ARMMMUFaultInfo;
typedef struct NVICState NVICState;
+/*
+ * Enum for indexing vfp.fp_status[].
+ *
+ * FPST_A32: is the "normal" fp status for AArch32 insns
+ * FPST_A64: is the "normal" fp status for AArch64 insns
+ * FPST_A32_F16: used for AArch32 half-precision calculations
+ * FPST_A64_F16: used for AArch64 half-precision calculations
+ * FPST_STD: the ARM "Standard FPSCR Value"
+ * FPST_STD_F16: used for half-precision
+ * calculations with the ARM "Standard FPSCR Value"
+ * FPST_AH: used for the A64 insns which change behaviour
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
+ * and the reciprocal and square root estimate/step insns)
+ * FPST_AH_F16: used for the A64 insns which change behaviour
+ * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
+ * and the reciprocal and square root estimate/step insns);
+ * for half-precision
+ *
+ * Half-precision operations are governed by a separate
+ * flush-to-zero control bit in FPSCR:FZ16. We pass a separate
+ * status structure to control this.
+ *
+ * The "Standard FPSCR", ie default-NaN, flush-to-zero,
+ * round-to-nearest and is used by any operations (generally
+ * Neon) which the architecture defines as controlled by the
+ * standard FPSCR value rather than the FPSCR.
+ *
+ * The "standard FPSCR but for fp16 ops" is needed because
+ * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
+ * using a fixed value for it.
+ *
+ * The ah_fp_status is needed because some insns have different
+ * behaviour when FPCR.AH == 1: they don't update cumulative
+ * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
+ * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
+ * which means we need an ah_fp_status_f16 as well.
+ *
+ * To avoid having to transfer exception bits around, we simply
+ * say that the FPSCR cumulative exception flags are the logical
+ * OR of the flags in the four fp statuses. This relies on the
+ * only thing which needs to read the exception flags being
+ * an explicit FPSCR read.
+ */
+typedef enum ARMFPStatusFlavour {
+ FPST_A32,
+ FPST_A64,
+ FPST_A32_F16,
+ FPST_A64_F16,
+ FPST_AH,
+ FPST_AH_F16,
+ FPST_STD,
+ FPST_STD_F16,
+} ARMFPStatusFlavour;
+#define FPST_COUNT 8
+
typedef struct CPUArchState {
/* Regs for current mode. */
uint32_t regs[16];
@@ -631,56 +686,20 @@ typedef struct CPUArchState {
/* Scratch space for aa32 neon expansion. */
uint32_t scratch[8];
- /* There are a number of distinct float control structures:
- *
- * fp_status_a32: is the "normal" fp status for AArch32 insns
- * fp_status_a64: is the "normal" fp status for AArch64 insns
- * fp_status_fp16_a32: used for AArch32 half-precision calculations
- * fp_status_fp16_a64: used for AArch64 half-precision calculations
- * standard_fp_status : the ARM "Standard FPSCR Value"
- * standard_fp_status_fp16 : used for half-precision
- * calculations with the ARM "Standard FPSCR Value"
- * ah_fp_status: used for the A64 insns which change behaviour
- * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
- * and the reciprocal and square root estimate/step insns)
- * ah_fp_status_f16: used for the A64 insns which change behaviour
- * when FPCR.AH == 1 (bfloat16 conversions and multiplies,
- * and the reciprocal and square root estimate/step insns);
- * for half-precision
- *
- * Half-precision operations are governed by a separate
- * flush-to-zero control bit in FPSCR:FZ16. We pass a separate
- * status structure to control this.
- *
- * The "Standard FPSCR", ie default-NaN, flush-to-zero,
- * round-to-nearest and is used by any operations (generally
- * Neon) which the architecture defines as controlled by the
- * standard FPSCR value rather than the FPSCR.
- *
- * The "standard FPSCR but for fp16 ops" is needed because
- * the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
- * using a fixed value for it.
- *
- * The ah_fp_status is needed because some insns have different
- * behaviour when FPCR.AH == 1: they don't update cumulative
- * exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
- * they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
- * which means we need an ah_fp_status_f16 as well.
- *
- * To avoid having to transfer exception bits around, we simply
- * say that the FPSCR cumulative exception flags are the logical
- * OR of the flags in the four fp statuses. This relies on the
- * only thing which needs to read the exception flags being
- * an explicit FPSCR read.
- */
- float_status fp_status_a32;
- float_status fp_status_a64;
- float_status fp_status_f16_a32;
- float_status fp_status_f16_a64;
- float_status standard_fp_status;
- float_status standard_fp_status_f16;
- float_status ah_fp_status;
- float_status ah_fp_status_f16;
+ /* There are a number of distinct float control structures. */
+ union {
+ float_status fp_status[FPST_COUNT];
+ struct {
+ float_status fp_status_a32;
+ float_status fp_status_a64;
+ float_status fp_status_f16_a32;
+ float_status fp_status_f16_a64;
+ float_status ah_fp_status;
+ float_status ah_fp_status_f16;
+ float_status standard_fp_status;
+ float_status standard_fp_status_f16;
+ };
+ };
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h
index d4ae39c469..6f854f1031 100644
--- a/target/arm/tcg/translate.h
+++ b/target/arm/tcg/translate.h
@@ -670,80 +670,18 @@ static inline CPUARMTBFlags arm_tbflags_from_tb(const TranslationBlock *tb)
return (CPUARMTBFlags){ tb->flags, tb->cs_base };
}
-/*
- * Enum for argument to fpstatus_ptr().
- */
-typedef enum ARMFPStatusFlavour {
- FPST_A32,
- FPST_A64,
- FPST_A32_F16,
- FPST_A64_F16,
- FPST_AH,
- FPST_AH_F16,
- FPST_STD,
- FPST_STD_F16,
-} ARMFPStatusFlavour;
-
/**
* fpstatus_ptr: return TCGv_ptr to the specified fp_status field
*
* We have multiple softfloat float_status fields in the Arm CPU state struct
* (see the comment in cpu.h for details). Return a TCGv_ptr which has
* been set up to point to the requested field in the CPU state struct.
- * The options are:
- *
- * FPST_A32
- * for AArch32 non-FP16 operations controlled by the FPCR
- * FPST_A64
- * for AArch64 non-FP16 operations controlled by the FPCR
- * FPST_A32_F16
- * for AArch32 operations controlled by the FPCR where FPCR.FZ16 is to be used
- * FPST_A64_F16
- * for AArch64 operations controlled by the FPCR where FPCR.FZ16 is to be used
- * FPST_AH:
- * for AArch64 operations which change behaviour when AH=1 (specifically,
- * bfloat16 conversions and multiplies, and the reciprocal and square root
- * estimate/step insns)
- * FPST_AH_F16:
- * ditto, but for half-precision operations
- * FPST_STD
- * for A32/T32 Neon operations using the "standard FPSCR value"
- * FPST_STD_F16
- * as FPST_STD, but where FPCR.FZ16 is to be used
*/
static inline TCGv_ptr fpstatus_ptr(ARMFPStatusFlavour flavour)
{
TCGv_ptr statusptr = tcg_temp_new_ptr();
- int offset;
+ int offset = offsetof(CPUARMState, vfp.fp_status[flavour]);
- switch (flavour) {
- case FPST_A32:
- offset = offsetof(CPUARMState, vfp.fp_status_a32);
- break;
- case FPST_A64:
- offset = offsetof(CPUARMState, vfp.fp_status_a64);
- break;
- case FPST_A32_F16:
- offset = offsetof(CPUARMState, vfp.fp_status_f16_a32);
- break;
- case FPST_A64_F16:
- offset = offsetof(CPUARMState, vfp.fp_status_f16_a64);
- break;
- case FPST_AH:
- offset = offsetof(CPUARMState, vfp.ah_fp_status);
- break;
- case FPST_AH_F16:
- offset = offsetof(CPUARMState, vfp.ah_fp_status_f16);
- break;
- case FPST_STD:
- offset = offsetof(CPUARMState, vfp.standard_fp_status);
- break;
- case FPST_STD_F16:
- offset = offsetof(CPUARMState, vfp.standard_fp_status_f16);
- break;
- default:
- g_assert_not_reached();
- }
tcg_gen_addi_ptr(statusptr, tcg_env, offset);
return statusptr;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v2 06/34] target/arm: Introduce CPUARMState.vfp.fp_status[]
2025-01-29 1:38 ` [PATCH v2 06/34] target/arm: Introduce CPUARMState.vfp.fp_status[] Richard Henderson
@ 2025-01-29 17:34 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 52+ messages in thread
From: Philippe Mathieu-Daudé @ 2025-01-29 17:34 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell
On 29/1/25 02:38, Richard Henderson wrote:
> Move ARMFPStatusFlavour to cpu.h with which to index
> this array. For now, place the array in an anonymous
> union with the existing structures. Adjust the order
> of the existing structures to match the enum.
>
> Simplify fpstatus_ptr() using the new array.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/arm/cpu.h | 119 +++++++++++++++++++++----------------
> target/arm/tcg/translate.h | 64 +-------------------
> 2 files changed, 70 insertions(+), 113 deletions(-)
Nice.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v2 07/34] target/arm: Remove standard_fp_status_f16
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (5 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 06/34] target/arm: Introduce CPUARMState.vfp.fp_status[] Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:35 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 08/34] target/arm: Remove standard_fp_status Richard Henderson
` (26 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Replace with fp_status[FPST_STD_F16].
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 1 -
target/arm/cpu.c | 4 ++--
target/arm/tcg/mve_helper.c | 24 ++++++++++++------------
target/arm/vfp_helper.c | 8 ++++----
4 files changed, 18 insertions(+), 19 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index c025649ff2..893a2cdd0a 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -697,7 +697,6 @@ typedef struct CPUArchState {
float_status ah_fp_status;
float_status ah_fp_status_f16;
float_status standard_fp_status;
- float_status standard_fp_status_f16;
};
};
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 8fa220a716..b887edf1d1 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -549,13 +549,13 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
set_flush_to_zero(1, &env->vfp.standard_fp_status);
set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
set_default_nan_mode(1, &env->vfp.standard_fp_status);
- set_default_nan_mode(1, &env->vfp.standard_fp_status_f16);
+ set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
- arm_set_default_fp_behaviours(&env->vfp.standard_fp_status_f16);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
set_flush_to_zero(1, &env->vfp.ah_fp_status);
set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
index 03ebef5ef2..911a53a23a 100644
--- a/target/arm/tcg/mve_helper.c
+++ b/target/arm/tcg/mve_helper.c
@@ -2814,7 +2814,7 @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
@@ -2888,7 +2888,7 @@ DO_2OP_FP_ALL(vminnma, minnuma)
r[e] = 0; \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
if (!(tm & 1)) { \
/* We need the result but without updating flags */ \
@@ -2926,7 +2926,7 @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
@@ -2964,7 +2964,7 @@ DO_VFMA(vfmss, 4, float32, true)
if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \
continue; \
} \
- fpst0 = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
fpst1 = fpst0; \
if (!(mask & 1)) { \
@@ -3049,7 +3049,7 @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
@@ -3084,7 +3084,7 @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
@@ -3117,7 +3117,7 @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
TYPE *m = vm; \
TYPE ra = (TYPE)ra_in; \
float_status *fpst = (ESIZE == 2) ? \
- &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
if (mask & 1) { \
@@ -3168,7 +3168,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
if ((mask & emask) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
if (!(mask & (1 << (e * ESIZE)))) { \
/* We need the result but without updating flags */ \
@@ -3202,7 +3202,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
if ((mask & emask) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
if (!(mask & (1 << (e * ESIZE)))) { \
/* We need the result but without updating flags */ \
@@ -3267,7 +3267,7 @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
@@ -3301,7 +3301,7 @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
float_status *fpst; \
float_status scratch_fpst; \
float_status *base_fpst = (ESIZE == 2) ? \
- &env->vfp.standard_fp_status_f16 : \
+ &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
set_float_rounding_mode(rmode, base_fpst); \
@@ -3427,7 +3427,7 @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.standard_fp_status_f16 : \
+ fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
&env->vfp.standard_fp_status; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index e63455c4bb..28c2b40bd8 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -122,7 +122,7 @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
/* FZ16 does not generate an input denormal exception. */
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
& ~float_flag_input_denormal_flushed);
- a32_flags |= (get_float_exception_flags(&env->vfp.standard_fp_status_f16)
+ a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
& ~float_flag_input_denormal_flushed);
a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
@@ -158,7 +158,7 @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
set_float_exception_flags(0, &env->vfp.standard_fp_status);
- set_float_exception_flags(0, &env->vfp.standard_fp_status_f16);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
set_float_exception_flags(0, &env->vfp.ah_fp_status);
set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
}
@@ -205,11 +205,11 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
bool ftz_enabled = val & FPCR_FZ16;
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
- set_flush_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.standard_fp_status_f16);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
}
if (changed & FPCR_FZ) {
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 08/34] target/arm: Remove standard_fp_status
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (6 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 07/34] target/arm: Remove standard_fp_status_f16 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:36 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 09/34] target/arm: Remove ah_fp_status_f16 Richard Henderson
` (25 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Replace with fp_status[FPST_STD].
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 1 -
target/arm/cpu.c | 8 ++++----
target/arm/tcg/mve_helper.c | 28 ++++++++++++++--------------
target/arm/tcg/vec_helper.c | 4 ++--
target/arm/vfp_helper.c | 4 ++--
5 files changed, 22 insertions(+), 23 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 893a2cdd0a..18afff8509 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -696,7 +696,6 @@ typedef struct CPUArchState {
float_status fp_status_f16_a64;
float_status ah_fp_status;
float_status ah_fp_status_f16;
- float_status standard_fp_status;
};
};
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index b887edf1d1..26e3465a4b 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -546,13 +546,13 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
env->sau.ctrl = 0;
}
- set_flush_to_zero(1, &env->vfp.standard_fp_status);
- set_flush_inputs_to_zero(1, &env->vfp.standard_fp_status);
- set_default_nan_mode(1, &env->vfp.standard_fp_status);
+ set_flush_to_zero(1, &env->vfp.fp_status[FPST_STD]);
+ set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]);
+ set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
- arm_set_default_fp_behaviours(&env->vfp.standard_fp_status);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
index 911a53a23a..3763d71e20 100644
--- a/target/arm/tcg/mve_helper.c
+++ b/target/arm/tcg/mve_helper.c
@@ -2815,7 +2815,7 @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
continue; \
} \
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -2889,7 +2889,7 @@ DO_2OP_FP_ALL(vminnma, minnuma)
continue; \
} \
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
if (!(tm & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -2927,7 +2927,7 @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
continue; \
} \
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -2965,7 +2965,7 @@ DO_VFMA(vfmss, 4, float32, true)
continue; \
} \
fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
fpst1 = fpst0; \
if (!(mask & 1)) { \
scratch_fpst = *fpst0; \
@@ -3050,7 +3050,7 @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
continue; \
} \
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3085,7 +3085,7 @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
continue; \
} \
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3118,7 +3118,7 @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
TYPE ra = (TYPE)ra_in; \
float_status *fpst = (ESIZE == 2) ? \
&env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
if (mask & 1) { \
TYPE v = m[H##ESIZE(e)]; \
@@ -3169,7 +3169,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
continue; \
} \
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
if (!(mask & (1 << (e * ESIZE)))) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3203,7 +3203,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
continue; \
} \
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
if (!(mask & (1 << (e * ESIZE)))) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3268,7 +3268,7 @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
continue; \
} \
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3302,7 +3302,7 @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
float_status scratch_fpst; \
float_status *base_fpst = (ESIZE == 2) ? \
&env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
set_float_rounding_mode(rmode, base_fpst); \
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
@@ -3347,7 +3347,7 @@ static void do_vcvt_sh(CPUARMState *env, void *vd, void *vm, int top)
unsigned e;
float_status *fpst;
float_status scratch_fpst;
- float_status *base_fpst = &env->vfp.standard_fp_status;
+ float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
bool old_fz = get_flush_to_zero(base_fpst);
set_flush_to_zero(false, base_fpst);
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
@@ -3377,7 +3377,7 @@ static void do_vcvt_hs(CPUARMState *env, void *vd, void *vm, int top)
unsigned e;
float_status *fpst;
float_status scratch_fpst;
- float_status *base_fpst = &env->vfp.standard_fp_status;
+ float_status *base_fpst = &env->vfp.fp_status[FPST_STD];
bool old_fiz = get_flush_inputs_to_zero(base_fpst);
set_flush_inputs_to_zero(false, base_fpst);
for (e = 0; e < 16 / 4; e++, mask >>= 4) {
@@ -3428,7 +3428,7 @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
continue; \
} \
fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.standard_fp_status; \
+ &env->vfp.fp_status[FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index b369c9f45b..60839ae560 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2177,7 +2177,7 @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
- do_fmlal(vd, vn, vm, &env->vfp.standard_fp_status, desc,
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc,
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
}
@@ -2239,7 +2239,7 @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
- do_fmlal_idx(vd, vn, vm, &env->vfp.standard_fp_status, desc,
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc,
get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
}
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 28c2b40bd8..93db713a40 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -118,7 +118,7 @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
uint32_t a32_flags = 0, a64_flags = 0;
a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
- a32_flags |= get_float_exception_flags(&env->vfp.standard_fp_status);
+ a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
/* FZ16 does not generate an input denormal exception. */
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
& ~float_flag_input_denormal_flushed);
@@ -157,7 +157,7 @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
set_float_exception_flags(0, &env->vfp.fp_status_a64);
set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
- set_float_exception_flags(0, &env->vfp.standard_fp_status);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
set_float_exception_flags(0, &env->vfp.ah_fp_status);
set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v2 08/34] target/arm: Remove standard_fp_status
2025-01-29 1:38 ` [PATCH v2 08/34] target/arm: Remove standard_fp_status Richard Henderson
@ 2025-01-29 17:36 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 52+ messages in thread
From: Philippe Mathieu-Daudé @ 2025-01-29 17:36 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell
On 29/1/25 02:38, Richard Henderson wrote:
> Replace with fp_status[FPST_STD].
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/arm/cpu.h | 1 -
> target/arm/cpu.c | 8 ++++----
> target/arm/tcg/mve_helper.c | 28 ++++++++++++++--------------
> target/arm/tcg/vec_helper.c | 4 ++--
> target/arm/vfp_helper.c | 4 ++--
> 5 files changed, 22 insertions(+), 23 deletions(-)
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v2 09/34] target/arm: Remove ah_fp_status_f16
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (7 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 08/34] target/arm: Remove standard_fp_status Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:36 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 10/34] target/arm: Remove ah_fp_status Richard Henderson
` (24 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Replace with fp_status[FPST_AH_F16].
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 3 +--
target/arm/cpu.c | 2 +-
target/arm/vfp_helper.c | 10 +++++-----
3 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 18afff8509..0f7d5d5430 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -237,7 +237,7 @@ typedef struct NVICState NVICState;
* behaviour when FPCR.AH == 1: they don't update cumulative
* exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
* they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
- * which means we need an ah_fp_status_f16 as well.
+ * which means we need an FPST_AH_F16 as well.
*
* To avoid having to transfer exception bits around, we simply
* say that the FPSCR cumulative exception flags are the logical
@@ -695,7 +695,6 @@ typedef struct CPUArchState {
float_status fp_status_f16_a32;
float_status fp_status_f16_a64;
float_status ah_fp_status;
- float_status ah_fp_status_f16;
};
};
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 26e3465a4b..ffb2151de5 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -559,7 +559,7 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
set_flush_to_zero(1, &env->vfp.ah_fp_status);
set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
- arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status_f16);
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]);
#ifndef CONFIG_USER_ONLY
if (kvm_enabled()) {
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 93db713a40..d8dc58098b 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -129,7 +129,7 @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
/*
- * We do not merge in flags from ah_fp_status or ah_fp_status_f16, because
+ * We do not merge in flags from ah_fp_status or FPST_AH_F16, because
* they are used for insns that must not set the cumulative exception bits.
*/
@@ -160,7 +160,7 @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
set_float_exception_flags(0, &env->vfp.ah_fp_status);
- set_float_exception_flags(0, &env->vfp.ah_fp_status_f16);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]);
}
static void vfp_sync_and_clear_float_status_exc_flags(CPUARMState *env)
@@ -206,11 +206,11 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
- set_flush_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.ah_fp_status_f16);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
}
if (changed & FPCR_FZ) {
bool ftz_enabled = val & FPCR_FZ;
@@ -235,7 +235,7 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status);
- set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status_f16);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
}
if (changed & FPCR_AH) {
bool ah_enabled = val & FPCR_AH;
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 10/34] target/arm: Remove ah_fp_status
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (8 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 09/34] target/arm: Remove ah_fp_status_f16 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:37 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 11/34] target/arm: Remove fp_status_f16_a64 Richard Henderson
` (23 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Replace with fp_status[FPST_AH].
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 3 +--
target/arm/cpu.c | 6 +++---
target/arm/vfp_helper.c | 6 +++---
3 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 0f7d5d5430..5e3d952588 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -233,7 +233,7 @@ typedef struct NVICState NVICState;
* the "standard FPSCR" tracks the FPSCR.FZ16 bit rather than
* using a fixed value for it.
*
- * The ah_fp_status is needed because some insns have different
+ * FPST_AH is needed because some insns have different
* behaviour when FPCR.AH == 1: they don't update cumulative
* exception flags, they act like FPCR.{FZ,FIZ} = {1,1} and
* they ignore FPCR.RMode. But they don't ignore FPCR.FZ16,
@@ -694,7 +694,6 @@ typedef struct CPUArchState {
float_status fp_status_a64;
float_status fp_status_f16_a32;
float_status fp_status_f16_a64;
- float_status ah_fp_status;
};
};
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index ffb2151de5..01a0428c6e 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -556,9 +556,9 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
- arm_set_ah_fp_behaviours(&env->vfp.ah_fp_status);
- set_flush_to_zero(1, &env->vfp.ah_fp_status);
- set_flush_inputs_to_zero(1, &env->vfp.ah_fp_status);
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
+ set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]);
+ set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_AH]);
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH_F16]);
#ifndef CONFIG_USER_ONLY
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index d8dc58098b..78be434caf 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -129,7 +129,7 @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
/*
- * We do not merge in flags from ah_fp_status or FPST_AH_F16, because
+ * We do not merge in flags from FPST_AH or FPST_AH_F16, because
* they are used for insns that must not set the cumulative exception bits.
*/
@@ -159,7 +159,7 @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
- set_float_exception_flags(0, &env->vfp.ah_fp_status);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH_F16]);
}
@@ -234,7 +234,7 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
- set_default_nan_mode(dnan_enabled, &env->vfp.ah_fp_status);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
}
if (changed & FPCR_AH) {
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 11/34] target/arm: Remove fp_status_f16_a64
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (9 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 10/34] target/arm: Remove ah_fp_status Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:37 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 12/34] target/arm: Remove fp_status_f16_a32 Richard Henderson
` (22 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Replace with fp_status[FPST_A64_F16].
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 1 -
target/arm/cpu.c | 2 +-
target/arm/tcg/sme_helper.c | 2 +-
target/arm/tcg/vec_helper.c | 8 ++++----
target/arm/vfp_helper.c | 16 ++++++++--------
5 files changed, 14 insertions(+), 15 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 5e3d952588..9e39c8d0d3 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -693,7 +693,6 @@ typedef struct CPUArchState {
float_status fp_status_a32;
float_status fp_status_a64;
float_status fp_status_f16_a32;
- float_status fp_status_f16_a64;
};
};
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 01a0428c6e..4fc1d00d60 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -554,7 +554,7 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
- arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
set_flush_to_zero(1, &env->vfp.fp_status[FPST_AH]);
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index 727c085f37..6e336e10c6 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -1043,7 +1043,7 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
* produces default NaNs. We also need a second copy of fp_status with
* round-to-odd -- see above.
*/
- fpst_f16 = env->vfp.fp_status_f16_a64;
+ fpst_f16 = env->vfp.fp_status[FPST_A64_F16];
fpst_std = env->vfp.fp_status_a64;
set_default_nan_mode(true, &fpst_std);
set_default_nan_mode(true, &fpst_f16);
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 60839ae560..927dece4c1 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2185,7 +2185,7 @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
}
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
@@ -2195,7 +2195,7 @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
float_status *status = &env->vfp.fp_status_a64;
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
+ bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
for (i = 0; i < oprsz; i += sizeof(float32)) {
float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn;
@@ -2247,7 +2247,7 @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64));
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
}
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
@@ -2258,7 +2258,7 @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
float_status *status = &env->vfp.fp_status_a64;
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a64);
+ bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
for (i = 0; i < oprsz; i += 16) {
float16 mm_16 = *(float16 *)(vm + i + idx);
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 78be434caf..bcb05d7ff9 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -126,7 +126,7 @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
& ~float_flag_input_denormal_flushed);
a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
- a64_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a64)
+ a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16])
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
/*
* We do not merge in flags from FPST_AH or FPST_AH_F16, because
@@ -156,7 +156,7 @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
set_float_exception_flags(0, &env->vfp.fp_status_a32);
set_float_exception_flags(0, &env->vfp.fp_status_a64);
set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
- set_float_exception_flags(0, &env->vfp.fp_status_f16_a64);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_AH]);
@@ -199,16 +199,16 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
set_float_rounding_mode(i, &env->vfp.fp_status_a64);
set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32);
- set_float_rounding_mode(i, &env->vfp.fp_status_f16_a64);
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
}
if (changed & FPCR_FZ16) {
bool ftz_enabled = val & FPCR_FZ16;
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a64);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
}
@@ -233,7 +233,7 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a64);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
}
@@ -243,10 +243,10 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
if (ah_enabled) {
/* Change behaviours for A64 FP operations */
arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64);
- arm_set_ah_fp_behaviours(&env->vfp.fp_status_f16_a64);
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
} else {
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
- arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a64);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
}
}
/*
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v2 11/34] target/arm: Remove fp_status_f16_a64
2025-01-29 1:38 ` [PATCH v2 11/34] target/arm: Remove fp_status_f16_a64 Richard Henderson
@ 2025-01-29 17:37 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 52+ messages in thread
From: Philippe Mathieu-Daudé @ 2025-01-29 17:37 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell
On 29/1/25 02:38, Richard Henderson wrote:
> Replace with fp_status[FPST_A64_F16].
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/arm/cpu.h | 1 -
> target/arm/cpu.c | 2 +-
> target/arm/tcg/sme_helper.c | 2 +-
> target/arm/tcg/vec_helper.c | 8 ++++----
> target/arm/vfp_helper.c | 16 ++++++++--------
> 5 files changed, 14 insertions(+), 15 deletions(-)
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v2 12/34] target/arm: Remove fp_status_f16_a32
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (10 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 11/34] target/arm: Remove fp_status_f16_a64 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:37 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 13/34] target/arm: Remove fp_status_a64 Richard Henderson
` (21 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Replace with fp_status[FPST_A32_F16].
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 1 -
target/arm/cpu.c | 2 +-
target/arm/tcg/vec_helper.c | 4 ++--
target/arm/vfp_helper.c | 14 +++++++-------
4 files changed, 10 insertions(+), 11 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 9e39c8d0d3..06dbee5725 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -692,7 +692,6 @@ typedef struct CPUArchState {
struct {
float_status fp_status_a32;
float_status fp_status_a64;
- float_status fp_status_f16_a32;
};
};
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 4fc1d00d60..ceb2dcb3fb 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -553,7 +553,7 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
- arm_set_default_fp_behaviours(&env->vfp.fp_status_f16_a32);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD_F16]);
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_AH]);
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 927dece4c1..61f268efad 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2178,7 +2178,7 @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
}
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
@@ -2240,7 +2240,7 @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status_f16_a32));
+ get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
}
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index bcb05d7ff9..6a6eb48530 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -120,7 +120,7 @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
/* FZ16 does not generate an input denormal exception. */
- a32_flags |= (get_float_exception_flags(&env->vfp.fp_status_f16_a32)
+ a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16])
& ~float_flag_input_denormal_flushed);
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
& ~float_flag_input_denormal_flushed);
@@ -155,7 +155,7 @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
*/
set_float_exception_flags(0, &env->vfp.fp_status_a32);
set_float_exception_flags(0, &env->vfp.fp_status_a64);
- set_float_exception_flags(0, &env->vfp.fp_status_f16_a32);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD_F16]);
@@ -198,16 +198,16 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
}
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
set_float_rounding_mode(i, &env->vfp.fp_status_a64);
- set_float_rounding_mode(i, &env->vfp.fp_status_f16_a32);
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
}
if (changed & FPCR_FZ16) {
bool ftz_enabled = val & FPCR_FZ16;
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_f16_a32);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32_F16]);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64_F16]);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_STD_F16]);
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_AH_F16]);
@@ -232,7 +232,7 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
bool dnan_enabled = val & FPCR_DN;
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_f16_a32);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH_F16]);
@@ -494,7 +494,7 @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
softfloat_to_vfp_compare(env, \
FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
}
-DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status_f16_a32)
+DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16])
DO_VFP_cmp(s, float32, float32, fp_status_a32)
DO_VFP_cmp(d, float64, float64, fp_status_a32)
#undef DO_VFP_cmp
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 13/34] target/arm: Remove fp_status_a64
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (11 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 12/34] target/arm: Remove fp_status_f16_a32 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:37 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 14/34] target/arm: Remove fp_status_a32 Richard Henderson
` (20 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Replace with fp_status[FPST_A64].
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 1 -
target/arm/cpu.c | 2 +-
target/arm/tcg/sme_helper.c | 2 +-
target/arm/tcg/vec_helper.c | 10 +++++-----
target/arm/vfp_helper.c | 16 ++++++++--------
5 files changed, 15 insertions(+), 16 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 06dbee5725..05a58de045 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -691,7 +691,6 @@ typedef struct CPUArchState {
float_status fp_status[FPST_COUNT];
struct {
float_status fp_status_a32;
- float_status fp_status_a64;
};
};
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index ceb2dcb3fb..777e5f5dd8 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -551,7 +551,7 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
- arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
index 6e336e10c6..dcc48e43db 100644
--- a/target/arm/tcg/sme_helper.c
+++ b/target/arm/tcg/sme_helper.c
@@ -1044,7 +1044,7 @@ void HELPER(sme_fmopa_h)(void *vza, void *vzn, void *vzm, void *vpn,
* round-to-odd -- see above.
*/
fpst_f16 = env->vfp.fp_status[FPST_A64_F16];
- fpst_std = env->vfp.fp_status_a64;
+ fpst_std = env->vfp.fp_status[FPST_A64];
set_default_nan_mode(true, &fpst_std);
set_default_nan_mode(true, &fpst_f16);
fpst_odd = fpst_std;
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 61f268efad..9ed04b1b0a 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2184,7 +2184,7 @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
- do_fmlal(vd, vn, vm, &env->vfp.fp_status_a64, desc,
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], desc,
get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
}
@@ -2194,7 +2194,7 @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
intptr_t i, oprsz = simd_oprsz(desc);
uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
- float_status *status = &env->vfp.fp_status_a64;
+ float_status *status = &env->vfp.fp_status[FPST_A64];
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
for (i = 0; i < oprsz; i += sizeof(float32)) {
@@ -2246,7 +2246,7 @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status_a64, desc,
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], desc,
get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
}
@@ -2257,7 +2257,7 @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
- float_status *status = &env->vfp.fp_status_a64;
+ float_status *status = &env->vfp.fp_status[FPST_A64];
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
for (i = 0; i < oprsz; i += 16) {
@@ -2936,7 +2936,7 @@ bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp)
*/
bool ebf = is_a64(env) && env->vfp.fpcr & FPCR_EBF;
- *statusp = env->vfp.fp_status_a64;
+ *statusp = env->vfp.fp_status[FPST_A64];
set_default_nan_mode(true, statusp);
if (ebf) {
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index 6a6eb48530..e0d0623097 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -125,7 +125,7 @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_STD_F16])
& ~float_flag_input_denormal_flushed);
- a64_flags |= get_float_exception_flags(&env->vfp.fp_status_a64);
+ a64_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A64]);
a64_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A64_F16])
& ~(float_flag_input_denormal_flushed | float_flag_input_denormal_used));
/*
@@ -154,7 +154,7 @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
* be the architecturally up-to-date exception flag information first.
*/
set_float_exception_flags(0, &env->vfp.fp_status_a32);
- set_float_exception_flags(0, &env->vfp.fp_status_a64);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_STD]);
@@ -197,7 +197,7 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
break;
}
set_float_rounding_mode(i, &env->vfp.fp_status_a32);
- set_float_rounding_mode(i, &env->vfp.fp_status_a64);
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]);
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
}
@@ -215,7 +215,7 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
if (changed & FPCR_FZ) {
bool ftz_enabled = val & FPCR_FZ;
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a64);
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]);
/* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
}
@@ -226,12 +226,12 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
*/
bool fitz_enabled = (val & FPCR_FIZ) ||
(val & (FPCR_FZ | FPCR_AH)) == FPCR_FZ;
- set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status_a64);
+ set_flush_inputs_to_zero(fitz_enabled, &env->vfp.fp_status[FPST_A64]);
}
if (changed & FPCR_DN) {
bool dnan_enabled = val & FPCR_DN;
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a64);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_AH]);
@@ -242,10 +242,10 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
if (ah_enabled) {
/* Change behaviours for A64 FP operations */
- arm_set_ah_fp_behaviours(&env->vfp.fp_status_a64);
+ arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
arm_set_ah_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
} else {
- arm_set_default_fp_behaviours(&env->vfp.fp_status_a64);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64_F16]);
}
}
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v2 13/34] target/arm: Remove fp_status_a64
2025-01-29 1:38 ` [PATCH v2 13/34] target/arm: Remove fp_status_a64 Richard Henderson
@ 2025-01-29 17:37 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 52+ messages in thread
From: Philippe Mathieu-Daudé @ 2025-01-29 17:37 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell
On 29/1/25 02:38, Richard Henderson wrote:
> Replace with fp_status[FPST_A64].
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/arm/cpu.h | 1 -
> target/arm/cpu.c | 2 +-
> target/arm/tcg/sme_helper.c | 2 +-
> target/arm/tcg/vec_helper.c | 10 +++++-----
> target/arm/vfp_helper.c | 16 ++++++++--------
> 5 files changed, 15 insertions(+), 16 deletions(-)
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v2 14/34] target/arm: Remove fp_status_a32
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (12 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 13/34] target/arm: Remove fp_status_a64 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:38 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 15/34] target/arm: Simplify fp_status indexing in mve_helper.c Richard Henderson
` (19 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Replace with fp_status[FPST_A32]. As this was the last of the
old structures, we can remove the anonymous union and struct.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/cpu.h | 7 +------
target/arm/cpu.c | 2 +-
target/arm/vfp_helper.c | 18 +++++++++---------
3 files changed, 11 insertions(+), 16 deletions(-)
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 05a58de045..e6513ef1e5 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -687,12 +687,7 @@ typedef struct CPUArchState {
uint32_t scratch[8];
/* There are a number of distinct float control structures. */
- union {
- float_status fp_status[FPST_COUNT];
- struct {
- float_status fp_status_a32;
- };
- };
+ float_status fp_status[FPST_COUNT];
uint64_t zcr_el[4]; /* ZCR_EL[1-3] */
uint64_t smcr_el[4]; /* SMCR_EL[1-3] */
diff --git a/target/arm/cpu.c b/target/arm/cpu.c
index 777e5f5dd8..180e11c5d7 100644
--- a/target/arm/cpu.c
+++ b/target/arm/cpu.c
@@ -550,7 +550,7 @@ static void arm_cpu_reset_hold(Object *obj, ResetType type)
set_flush_inputs_to_zero(1, &env->vfp.fp_status[FPST_STD]);
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD]);
set_default_nan_mode(1, &env->vfp.fp_status[FPST_STD_F16]);
- arm_set_default_fp_behaviours(&env->vfp.fp_status_a32);
+ arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32]);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A64]);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_STD]);
arm_set_default_fp_behaviours(&env->vfp.fp_status[FPST_A32_F16]);
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index e0d0623097..a2775a2e8d 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -117,7 +117,7 @@ static uint32_t vfp_get_fpsr_from_host(CPUARMState *env)
{
uint32_t a32_flags = 0, a64_flags = 0;
- a32_flags |= get_float_exception_flags(&env->vfp.fp_status_a32);
+ a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_A32]);
a32_flags |= get_float_exception_flags(&env->vfp.fp_status[FPST_STD]);
/* FZ16 does not generate an input denormal exception. */
a32_flags |= (get_float_exception_flags(&env->vfp.fp_status[FPST_A32_F16])
@@ -153,7 +153,7 @@ static void vfp_clear_float_status_exc_flags(CPUARMState *env)
* values. The caller should have arranged for env->vfp.fpsr to
* be the architecturally up-to-date exception flag information first.
*/
- set_float_exception_flags(0, &env->vfp.fp_status_a32);
+ set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A32_F16]);
set_float_exception_flags(0, &env->vfp.fp_status[FPST_A64_F16]);
@@ -196,7 +196,7 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
i = float_round_to_zero;
break;
}
- set_float_rounding_mode(i, &env->vfp.fp_status_a32);
+ set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32]);
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64]);
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A32_F16]);
set_float_rounding_mode(i, &env->vfp.fp_status[FPST_A64_F16]);
@@ -214,10 +214,10 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
}
if (changed & FPCR_FZ) {
bool ftz_enabled = val & FPCR_FZ;
- set_flush_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
+ set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
set_flush_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A64]);
/* FIZ is A64 only so FZ always makes A32 code flush inputs to zero */
- set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status_a32);
+ set_flush_inputs_to_zero(ftz_enabled, &env->vfp.fp_status[FPST_A32]);
}
if (changed & (FPCR_FZ | FPCR_AH | FPCR_FIZ)) {
/*
@@ -230,7 +230,7 @@ static void vfp_set_fpcr_to_host(CPUARMState *env, uint32_t val, uint32_t mask)
}
if (changed & FPCR_DN) {
bool dnan_enabled = val & FPCR_DN;
- set_default_nan_mode(dnan_enabled, &env->vfp.fp_status_a32);
+ set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A32_F16]);
set_default_nan_mode(dnan_enabled, &env->vfp.fp_status[FPST_A64_F16]);
@@ -495,8 +495,8 @@ void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
}
DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16])
-DO_VFP_cmp(s, float32, float32, fp_status_a32)
-DO_VFP_cmp(d, float64, float64, fp_status_a32)
+DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32])
+DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32])
#undef DO_VFP_cmp
/* Integer to float and float to integer conversions */
@@ -1383,7 +1383,7 @@ uint64_t HELPER(fjcvtzs)(float64 value, float_status *status)
uint32_t HELPER(vjcvt)(float64 value, CPUARMState *env)
{
- uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status_a32);
+ uint64_t pair = HELPER(fjcvtzs)(value, &env->vfp.fp_status[FPST_A32]);
uint32_t result = pair;
uint32_t z = (pair >> 32) == 0;
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v2 14/34] target/arm: Remove fp_status_a32
2025-01-29 1:38 ` [PATCH v2 14/34] target/arm: Remove fp_status_a32 Richard Henderson
@ 2025-01-29 17:38 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 52+ messages in thread
From: Philippe Mathieu-Daudé @ 2025-01-29 17:38 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell
On 29/1/25 02:38, Richard Henderson wrote:
> Replace with fp_status[FPST_A32]. As this was the last of the
> old structures, we can remove the anonymous union and struct.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/arm/cpu.h | 7 +------
> target/arm/cpu.c | 2 +-
> target/arm/vfp_helper.c | 18 +++++++++---------
> 3 files changed, 11 insertions(+), 16 deletions(-)
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v2 15/34] target/arm: Simplify fp_status indexing in mve_helper.c
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (13 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 14/34] target/arm: Remove fp_status_a32 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:39 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 16/34] target/arm: Simplify DO_VFP_cmp in vfp_helper.c Richard Henderson
` (18 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Select on index instead of pointer.
No functional change.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/mve_helper.c | 40 +++++++++++++------------------------
1 file changed, 14 insertions(+), 26 deletions(-)
diff --git a/target/arm/tcg/mve_helper.c b/target/arm/tcg/mve_helper.c
index 3763d71e20..274003e2e5 100644
--- a/target/arm/tcg/mve_helper.c
+++ b/target/arm/tcg/mve_helper.c
@@ -2814,8 +2814,7 @@ DO_VMAXMINA(vminaw, 4, int32_t, uint32_t, DO_MIN)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -2888,8 +2887,7 @@ DO_2OP_FP_ALL(vminnma, minnuma)
r[e] = 0; \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(tm & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -2926,8 +2924,7 @@ DO_VCADD_FP(vfcadd270s, 4, float32, float32_add, float32_sub)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -2964,8 +2961,7 @@ DO_VFMA(vfmss, 4, float32, true)
if ((mask & MAKE_64BIT_MASK(0, ESIZE * 2)) == 0) { \
continue; \
} \
- fpst0 = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ fpst0 = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
fpst1 = fpst0; \
if (!(mask & 1)) { \
scratch_fpst = *fpst0; \
@@ -3049,8 +3045,7 @@ DO_VCMLA(vcmla270s, 4, float32, 3, DO_VCMLAS)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3084,8 +3079,7 @@ DO_2OP_FP_SCALAR_ALL(vfmul_scalar, mul)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3116,9 +3110,8 @@ DO_2OP_FP_ACC_SCALAR(vfmas_scalars, 4, float32, DO_VFMAS_SCALARS)
unsigned e; \
TYPE *m = vm; \
TYPE ra = (TYPE)ra_in; \
- float_status *fpst = (ESIZE == 2) ? \
- &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ float_status *fpst = \
+ &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
if (mask & 1) { \
TYPE v = m[H##ESIZE(e)]; \
@@ -3168,8 +3161,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
if ((mask & emask) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & (1 << (e * ESIZE)))) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3202,8 +3194,7 @@ DO_FP_VMAXMINV(vminnmavs, 4, float32, true, float32_minnum)
if ((mask & emask) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & (1 << (e * ESIZE)))) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3267,8 +3258,7 @@ DO_VCMP_FP_BOTH(vfcmples, vfcmple_scalars, 4, float32, !DO_GT32)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
@@ -3300,9 +3290,8 @@ DO_VCVT_FIXED(vcvt_fu, 4, uint32_t, helper_vfp_touls_round_to_zero)
unsigned e; \
float_status *fpst; \
float_status scratch_fpst; \
- float_status *base_fpst = (ESIZE == 2) ? \
- &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ float_status *base_fpst = \
+ &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
uint32_t prev_rmode = get_float_rounding_mode(base_fpst); \
set_float_rounding_mode(rmode, base_fpst); \
for (e = 0; e < 16 / ESIZE; e++, mask >>= ESIZE) { \
@@ -3427,8 +3416,7 @@ void HELPER(mve_vcvtt_hs)(CPUARMState *env, void *vd, void *vm)
if ((mask & MAKE_64BIT_MASK(0, ESIZE)) == 0) { \
continue; \
} \
- fpst = (ESIZE == 2) ? &env->vfp.fp_status[FPST_STD_F16] : \
- &env->vfp.fp_status[FPST_STD]; \
+ fpst = &env->vfp.fp_status[ESIZE == 2 ? FPST_STD_F16 : FPST_STD]; \
if (!(mask & 1)) { \
/* We need the result but without updating flags */ \
scratch_fpst = *fpst; \
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 16/34] target/arm: Simplify DO_VFP_cmp in vfp_helper.c
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (14 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 15/34] target/arm: Simplify fp_status indexing in mve_helper.c Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 17:40 ` Philippe Mathieu-Daudé
2025-01-29 1:38 ` [PATCH v2 17/34] target/arm: Move float*_ah_chs to vec_internal.h Richard Henderson
` (17 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Pass ARMFPStatusFlavour index instead of fp_status[FOO].
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/vfp_helper.c | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/target/arm/vfp_helper.c b/target/arm/vfp_helper.c
index a2775a2e8d..4e242275e7 100644
--- a/target/arm/vfp_helper.c
+++ b/target/arm/vfp_helper.c
@@ -487,16 +487,16 @@ static void softfloat_to_vfp_compare(CPUARMState *env, FloatRelation cmp)
void VFP_HELPER(cmp, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
{ \
softfloat_to_vfp_compare(env, \
- FLOATTYPE ## _compare_quiet(a, b, &env->vfp.FPST)); \
+ FLOATTYPE ## _compare_quiet(a, b, &env->vfp.fp_status[FPST])); \
} \
void VFP_HELPER(cmpe, P)(ARGTYPE a, ARGTYPE b, CPUARMState *env) \
{ \
softfloat_to_vfp_compare(env, \
- FLOATTYPE ## _compare(a, b, &env->vfp.FPST)); \
+ FLOATTYPE ## _compare(a, b, &env->vfp.fp_status[FPST])); \
}
-DO_VFP_cmp(h, float16, dh_ctype_f16, fp_status[FPST_A32_F16])
-DO_VFP_cmp(s, float32, float32, fp_status[FPST_A32])
-DO_VFP_cmp(d, float64, float64, fp_status[FPST_A32])
+DO_VFP_cmp(h, float16, dh_ctype_f16, FPST_A32_F16)
+DO_VFP_cmp(s, float32, float32, FPST_A32)
+DO_VFP_cmp(d, float64, float64, FPST_A32)
#undef DO_VFP_cmp
/* Integer to float and float to integer conversions */
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 17/34] target/arm: Move float*_ah_chs to vec_internal.h
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (15 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 16/34] target/arm: Simplify DO_VFP_cmp in vfp_helper.c Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 18/34] target/arm: Introduce float*_maybe_ah_chs Richard Henderson
` (16 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/vec_internal.h | 20 ++++++++++++++++++++
target/arm/tcg/helper-a64.c | 15 +--------------
2 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
index 094f5c169c..a673935f48 100644
--- a/target/arm/tcg/vec_internal.h
+++ b/target/arm/tcg/vec_internal.h
@@ -20,6 +20,8 @@
#ifndef TARGET_ARM_VEC_INTERNAL_H
#define TARGET_ARM_VEC_INTERNAL_H
+#include "fpu/softfloat.h"
+
/*
* Note that vector data is stored in host-endian 64-bit chunks,
* so addressing units smaller than that needs a host-endian fixup.
@@ -265,4 +267,22 @@ float32 bfdotadd_ebf(float32 sum, uint32_t e1, uint32_t e2,
*/
bool is_ebf(CPUARMState *env, float_status *statusp, float_status *oddstatusp);
+/*
+ * Negate as for FPCR.AH=1 -- do not negate NaNs.
+ */
+static inline float16 float16_ah_chs(float16 a)
+{
+ return float16_is_any_nan(a) ? a : float16_chs(a);
+}
+
+static inline float32 float32_ah_chs(float32 a)
+{
+ return float32_is_any_nan(a) ? a : float32_chs(a);
+}
+
+static inline float64 float64_ah_chs(float64 a)
+{
+ return float64_is_any_nan(a) ? a : float64_chs(a);
+}
+
#endif /* TARGET_ARM_VEC_INTERNAL_H */
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index ba21efd0bb..dc96c92a21 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -38,6 +38,7 @@
#ifdef CONFIG_USER_ONLY
#include "user/page-protection.h"
#endif
+#include "vec_internal.h"
/* C2.4.7 Multiply and divide */
/* special cases for 0 and LLONG_MIN are mandated by the standard */
@@ -208,20 +209,6 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, float_status *fpst)
return -float64_lt(b, a, fpst);
}
-static float16 float16_ah_chs(float16 a)
-{
- return float16_is_any_nan(a) ? a : float16_chs(a);
-}
-
-static float32 float32_ah_chs(float32 a)
-{
- return float32_is_any_nan(a) ? a : float32_chs(a);
-}
-
-static float64 float64_ah_chs(float64 a)
-{
- return float64_is_any_nan(a) ? a : float64_chs(a);
-}
/*
* Reciprocal step and sqrt step. Note that unlike the A32/T32
* versions, these do a fully fused multiply-add or
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 18/34] target/arm: Introduce float*_maybe_ah_chs
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (16 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 17/34] target/arm: Move float*_ah_chs to vec_internal.h Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 19/34] target/arm: Use float*_maybe_ah_chs in sve_ftssel_* Richard Henderson
` (15 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Add versions of float*_ah_chs which takes fpcr_ah.
These will help simplify some usages.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/vec_internal.h | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git a/target/arm/tcg/vec_internal.h b/target/arm/tcg/vec_internal.h
index a673935f48..6b93b5aeb9 100644
--- a/target/arm/tcg/vec_internal.h
+++ b/target/arm/tcg/vec_internal.h
@@ -285,4 +285,19 @@ static inline float64 float64_ah_chs(float64 a)
return float64_is_any_nan(a) ? a : float64_chs(a);
}
+static inline float16 float16_maybe_ah_chs(float16 a, bool fpcr_ah)
+{
+ return fpcr_ah && float16_is_any_nan(a) ? a : float16_chs(a);
+}
+
+static inline float32 float32_maybe_ah_chs(float32 a, bool fpcr_ah)
+{
+ return fpcr_ah && float32_is_any_nan(a) ? a : float32_chs(a);
+}
+
+static inline float64 float64_maybe_ah_chs(float64 a, bool fpcr_ah)
+{
+ return fpcr_ah && float64_is_any_nan(a) ? a : float64_chs(a);
+}
+
#endif /* TARGET_ARM_VEC_INTERNAL_H */
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 19/34] target/arm: Use float*_maybe_ah_chs in sve_ftssel_*
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (17 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 18/34] target/arm: Introduce float*_maybe_ah_chs Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 20/34] target/arm: Use float*_maybe_ah_chs in sve_fcadd_* Richard Henderson
` (14 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/sve_helper.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index 3f38e07829..a2ff3b7f11 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -2563,8 +2563,8 @@ void HELPER(sve_ftssel_h)(void *vd, void *vn, void *vm, uint32_t desc)
if (mm & 1) {
nn = float16_one;
}
- if ((mm & 2) && !(fpcr_ah && float16_is_any_nan(nn))) {
- nn ^= (1 << 15);
+ if (mm & 2) {
+ nn = float16_maybe_ah_chs(nn, fpcr_ah);
}
d[i] = nn;
}
@@ -2581,8 +2581,8 @@ void HELPER(sve_ftssel_s)(void *vd, void *vn, void *vm, uint32_t desc)
if (mm & 1) {
nn = float32_one;
}
- if ((mm & 2) && !(fpcr_ah && float32_is_any_nan(nn))) {
- nn ^= (1U << 31);
+ if (mm & 2) {
+ nn = float32_maybe_ah_chs(nn, fpcr_ah);
}
d[i] = nn;
}
@@ -2599,8 +2599,8 @@ void HELPER(sve_ftssel_d)(void *vd, void *vn, void *vm, uint32_t desc)
if (mm & 1) {
nn = float64_one;
}
- if ((mm & 2) && !(fpcr_ah && float64_is_any_nan(nn))) {
- nn ^= (1ULL << 63);
+ if (mm & 2) {
+ nn = float64_maybe_ah_chs(nn, fpcr_ah);
}
d[i] = nn;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 20/34] target/arm: Use float*_maybe_ah_chs in sve_fcadd_*
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (18 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 19/34] target/arm: Use float*_maybe_ah_chs in sve_ftssel_* Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 21/34] " Richard Henderson
` (13 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
The construction of neg_imag and neg_real were done to make it easy
to apply both in parallel with two simple logical operations. This
changed with FPCR.AH, which is more complex than that.
Note that there was a naming issue with neg_imag and neg_real.
They were named backward, with neg_imag being non-zero for rot=1,
and vice versa. This was combined with reversed usage within the
loop, so that the negation in the end turned out correct.
Using the rot variable introduced with fpcr_ah, it's easier to
match the pseudocode for the instruction.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/sve_helper.c | 33 ++++++++++++---------------------
1 file changed, 12 insertions(+), 21 deletions(-)
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index a2ff3b7f11..a1f7743221 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -5226,8 +5226,6 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
uint64_t *g = vg;
bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- float16 neg_imag = float16_set_sign(0, rot);
- float16 neg_real = float16_chs(neg_imag);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5243,11 +5241,10 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm, void *vg,
e2 = *(float16 *)(vn + H1_2(j));
e3 = *(float16 *)(vm + H1_2(i));
- if (neg_real && !(fpcr_ah && float16_is_any_nan(e1))) {
- e1 ^= neg_real;
- }
- if (neg_imag && !(fpcr_ah && float16_is_any_nan(e3))) {
- e3 ^= neg_imag;
+ if (rot) {
+ e3 = float16_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float16_maybe_ah_chs(e1, fpcr_ah);
}
if (likely((pg >> (i & 63)) & 1)) {
@@ -5267,8 +5264,6 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
uint64_t *g = vg;
bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- float32 neg_imag = float32_set_sign(0, rot);
- float32 neg_real = float32_chs(neg_imag);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5284,11 +5279,10 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm, void *vg,
e2 = *(float32 *)(vn + H1_2(j));
e3 = *(float32 *)(vm + H1_2(i));
- if (neg_real && !(fpcr_ah && float32_is_any_nan(e1))) {
- e1 ^= neg_real;
- }
- if (neg_imag && !(fpcr_ah && float32_is_any_nan(e3))) {
- e3 ^= neg_imag;
+ if (rot) {
+ e3 = float32_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float32_maybe_ah_chs(e1, fpcr_ah);
}
if (likely((pg >> (i & 63)) & 1)) {
@@ -5308,8 +5302,6 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
uint64_t *g = vg;
bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- float64 neg_imag = float64_set_sign(0, rot);
- float64 neg_real = float64_chs(neg_imag);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5325,11 +5317,10 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm, void *vg,
e2 = *(float64 *)(vn + H1_2(j));
e3 = *(float64 *)(vm + H1_2(i));
- if (neg_real && !(fpcr_ah && float64_is_any_nan(e1))) {
- e1 ^= neg_real;
- }
- if (neg_imag && !(fpcr_ah && float64_is_any_nan(e3))) {
- e3 ^= neg_imag;
+ if (rot) {
+ e3 = float64_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float64_maybe_ah_chs(e1, fpcr_ah);
}
if (likely((pg >> (i & 63)) & 1)) {
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 21/34] target/arm: Use float*_maybe_ah_chs in sve_fcadd_*
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (19 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 20/34] target/arm: Use float*_maybe_ah_chs in sve_fcadd_* Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 22/34] target/arm: Use flags for AH negation in do_fmla_zpzzz_* Richard Henderson
` (12 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
The construction of neg_imag and neg_real were done to make it easy
to apply both in parallel with two simple logical operations. This
changed with FPCR.AH, which is more complex than that.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/vec_helper.c | 51 +++++++++++--------------------------
1 file changed, 15 insertions(+), 36 deletions(-)
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 9ed04b1b0a..55bac9536f 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -879,27 +879,20 @@ void HELPER(gvec_fcaddh)(void *vd, void *vn, void *vm,
float16 *d = vd;
float16 *n = vn;
float16 *m = vm;
- uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = neg_real ^ 1;
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 15;
- neg_imag <<= 15;
-
for (i = 0; i < opr_sz / 2; i += 2) {
float16 e0 = n[H2(i)];
float16 e1 = m[H2(i + 1)];
float16 e2 = n[H2(i + 1)];
float16 e3 = m[H2(i)];
- /* FPNeg() mustn't flip sign of a NaN if FPCR.AH == 1 */
- if (!(fpcr_ah && float16_is_any_nan(e1))) {
- e1 ^= neg_imag;
- }
- if (!(fpcr_ah && float16_is_any_nan(e3))) {
- e3 ^= neg_real;
+ if (rot) {
+ e3 = float16_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float16_maybe_ah_chs(e1, fpcr_ah);
}
d[H2(i)] = float16_add(e0, e1, fpst);
@@ -915,27 +908,20 @@ void HELPER(gvec_fcadds)(void *vd, void *vn, void *vm,
float32 *d = vd;
float32 *n = vn;
float32 *m = vm;
- uint32_t neg_real = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = neg_real ^ 1;
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 31;
- neg_imag <<= 31;
-
for (i = 0; i < opr_sz / 4; i += 2) {
float32 e0 = n[H4(i)];
float32 e1 = m[H4(i + 1)];
float32 e2 = n[H4(i + 1)];
float32 e3 = m[H4(i)];
- /* FPNeg() mustn't flip sign of a NaN if FPCR.AH == 1 */
- if (!(fpcr_ah && float32_is_any_nan(e1))) {
- e1 ^= neg_imag;
- }
- if (!(fpcr_ah && float32_is_any_nan(e3))) {
- e3 ^= neg_real;
+ if (rot) {
+ e3 = float32_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float32_maybe_ah_chs(e1, fpcr_ah);
}
d[H4(i)] = float32_add(e0, e1, fpst);
@@ -951,27 +937,20 @@ void HELPER(gvec_fcaddd)(void *vd, void *vn, void *vm,
float64 *d = vd;
float64 *n = vn;
float64 *m = vm;
- uint64_t neg_real = extract64(desc, SIMD_DATA_SHIFT, 1);
- uint64_t neg_imag = neg_real ^ 1;
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
bool fpcr_ah = extract64(desc, SIMD_DATA_SHIFT + 1, 1);
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 63;
- neg_imag <<= 63;
-
for (i = 0; i < opr_sz / 8; i += 2) {
float64 e0 = n[i];
float64 e1 = m[i + 1];
float64 e2 = n[i + 1];
float64 e3 = m[i];
- /* FPNeg() mustn't flip sign of a NaN if FPCR.AH == 1 */
- if (!(fpcr_ah && float64_is_any_nan(e1))) {
- e1 ^= neg_imag;
- }
- if (!(fpcr_ah && float64_is_any_nan(e3))) {
- e3 ^= neg_real;
+ if (rot) {
+ e3 = float64_maybe_ah_chs(e3, fpcr_ah);
+ } else {
+ e1 = float64_maybe_ah_chs(e1, fpcr_ah);
}
d[i] = float64_add(e0, e1, fpst);
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 22/34] target/arm: Use flags for AH negation in do_fmla_zpzzz_*
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (20 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 21/34] " Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 23/34] target/arm: Use flags for AH negation in sve_ftmad_* Richard Henderson
` (11 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
The float*_muladd functions have a flags argument that can
perform optional negation of various operand. We don't use
that for "normal" arm fmla, because the muladd flags are not
applied when an input is a NaN. But since FEAT_AFP does not
negate NaNs, this behaviour is exactly what we need.
Since we have separate helper entry points for the various
fmla, fmls, fnmla, fnmls instructions, it's easy to just
pass down the exact values required so that no conditional
branch is required within the inner loop.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/sve_helper.c | 93 +++++++++++++++++--------------------
1 file changed, 42 insertions(+), 51 deletions(-)
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index a1f7743221..a01613f079 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -4814,7 +4814,7 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint16_t neg1, uint16_t neg3, bool fpcr_ah)
+ uint16_t neg1, uint16_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4826,16 +4826,10 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float16 e1, e2, e3, r;
- e1 = *(uint16_t *)(vn + H1_2(i));
+ e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
e2 = *(uint16_t *)(vm + H1_2(i));
- e3 = *(uint16_t *)(va + H1_2(i));
- if (neg1 && !(fpcr_ah && float16_is_any_nan(e1))) {
- e1 ^= neg1;
- }
- if (neg3 && !(fpcr_ah && float16_is_any_nan(e3))) {
- e3 ^= neg3;
- }
- r = float16_muladd(e1, e2, e3, 0, status);
+ e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
+ r = float16_muladd(e1, e2, e3, flags, status);
*(uint16_t *)(vd + H1_2(i)) = r;
}
} while (i & 63);
@@ -4845,48 +4839,51 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, false);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, false);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, false);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0);
}
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, false);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0);
}
void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, true);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
}
void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, true);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
}
void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, true);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint32_t neg1, uint32_t neg3, bool fpcr_ah)
+ uint32_t neg1, uint32_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4898,16 +4895,10 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float32 e1, e2, e3, r;
- e1 = *(uint32_t *)(vn + H1_4(i));
+ e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
e2 = *(uint32_t *)(vm + H1_4(i));
- e3 = *(uint32_t *)(va + H1_4(i));
- if (neg1 && !(fpcr_ah && float32_is_any_nan(e1))) {
- e1 ^= neg1;
- }
- if (neg3 && !(fpcr_ah && float32_is_any_nan(e3))) {
- e3 ^= neg3;
- }
- r = float32_muladd(e1, e2, e3, 0, status);
+ e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
+ r = float32_muladd(e1, e2, e3, flags, status);
*(uint32_t *)(vd + H1_4(i)) = r;
}
} while (i & 63);
@@ -4917,48 +4908,51 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, false);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, false);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, false);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, 0);
}
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, false);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0);
}
void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, true);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
}
void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000, true);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
}
void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, true);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint64_t neg1, uint64_t neg3, bool fpcr_ah)
+ uint64_t neg1, uint64_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4970,16 +4964,10 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float64 e1, e2, e3, r;
- e1 = *(uint64_t *)(vn + i);
+ e1 = *(uint64_t *)(vn + i) ^ neg1;
e2 = *(uint64_t *)(vm + i);
- e3 = *(uint64_t *)(va + i);
- if (neg1 && !(fpcr_ah && float64_is_any_nan(e1))) {
- e1 ^= neg1;
- }
- if (neg3 && !(fpcr_ah && float64_is_any_nan(e3))) {
- e3 ^= neg3;
- }
- r = float64_muladd(e1, e2, e3, 0, status);
+ e3 = *(uint64_t *)(va + i) ^ neg3;
+ r = float64_muladd(e1, e2, e3, flags, status);
*(uint64_t *)(vd + i) = r;
}
} while (i & 63);
@@ -4989,43 +4977,46 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, false);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, false);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, false);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0);
}
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, false);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0);
}
void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, true);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
}
void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, true);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
}
void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, true);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
/* Two operand floating-point comparison controlled by a predicate.
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 23/34] target/arm: Use flags for AH negation in sve_ftmad_*
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (21 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 22/34] target/arm: Use flags for AH negation in do_fmla_zpzzz_* Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 24/34] target/arm: Use flags for AH negation in float*_ah_mulsub_f Richard Henderson
` (10 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Because the operand is known to be negative, negating the operand
is the same as taking the absolute value. Defer this to the muladd
operation via flags, so that it happens after NaN detection, which
is correct for FPCR.AH.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/sve_helper.c | 27 +++++++++++++++++++++------
1 file changed, 21 insertions(+), 6 deletions(-)
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index a01613f079..c12b2600bd 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -5137,16 +5137,21 @@ void HELPER(sve_ftmad_h)(void *vd, void *vn, void *vm,
intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
float16 *d = vd, *n = vn, *m = vm;
+
for (i = 0; i < opr_sz; i++) {
float16 mm = m[i];
intptr_t xx = x;
+ int flags = 0;
+
if (float16_is_neg(mm)) {
- if (!(fpcr_ah && float16_is_any_nan(mm))) {
+ if (fpcr_ah) {
+ flags = float_muladd_negate_product;
+ } else {
mm = float16_abs(mm);
}
xx += 8;
}
- d[i] = float16_muladd(n[i], mm, coeff[xx], 0, s);
+ d[i] = float16_muladd(n[i], mm, coeff[xx], flags, s);
}
}
@@ -5163,16 +5168,21 @@ void HELPER(sve_ftmad_s)(void *vd, void *vn, void *vm,
intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
float32 *d = vd, *n = vn, *m = vm;
+
for (i = 0; i < opr_sz; i++) {
float32 mm = m[i];
intptr_t xx = x;
+ int flags = 0;
+
if (float32_is_neg(mm)) {
- if (!(fpcr_ah && float32_is_any_nan(mm))) {
+ if (fpcr_ah) {
+ flags = float_muladd_negate_product;
+ } else {
mm = float32_abs(mm);
}
xx += 8;
}
- d[i] = float32_muladd(n[i], mm, coeff[xx], 0, s);
+ d[i] = float32_muladd(n[i], mm, coeff[xx], flags, s);
}
}
@@ -5193,16 +5203,21 @@ void HELPER(sve_ftmad_d)(void *vd, void *vn, void *vm,
intptr_t x = extract32(desc, SIMD_DATA_SHIFT, 3);
bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 3, 1);
float64 *d = vd, *n = vn, *m = vm;
+
for (i = 0; i < opr_sz; i++) {
float64 mm = m[i];
intptr_t xx = x;
+ int flags = 0;
+
if (float64_is_neg(mm)) {
- if (!(fpcr_ah && float64_is_any_nan(mm))) {
+ if (fpcr_ah) {
+ flags = float_muladd_negate_product;
+ } else {
mm = float64_abs(mm);
}
xx += 8;
}
- d[i] = float64_muladd(n[i], mm, coeff[xx], 0, s);
+ d[i] = float64_muladd(n[i], mm, coeff[xx], flags, s);
}
}
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 24/34] target/arm: Use flags for AH negation in float*_ah_mulsub_f
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (22 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 23/34] target/arm: Use flags for AH negation in sve_ftmad_* Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 25/34] target/arm: Handle FPCR.AH in gvec_fcmla[hsd] Richard Henderson
` (9 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
The float_muladd_negate_product flag produces the same result
as negating either of the multiplication operands, assuming
neither of the operands are NaNs. But since FEAT_AFP does not
negate NaNs, this behaviour is exactly what we need.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/vec_helper.c | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 55bac9536f..5c1e84bf27 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -1563,22 +1563,19 @@ static float64 float64_mulsub_f(float64 dest, float64 op1, float64 op2,
static float16 float16_ah_mulsub_f(float16 dest, float16 op1, float16 op2,
float_status *stat)
{
- op1 = float16_is_any_nan(op1) ? op1 : float16_chs(op1);
- return float16_muladd(op1, op2, dest, 0, stat);
+ return float16_muladd(op1, op2, dest, float_muladd_negate_product, stat);
}
static float32 float32_ah_mulsub_f(float32 dest, float32 op1, float32 op2,
float_status *stat)
{
- op1 = float32_is_any_nan(op1) ? op1 : float32_chs(op1);
- return float32_muladd(op1, op2, dest, 0, stat);
+ return float32_muladd(op1, op2, dest, float_muladd_negate_product, stat);
}
static float64 float64_ah_mulsub_f(float64 dest, float64 op1, float64 op2,
float_status *stat)
{
- op1 = float64_is_any_nan(op1) ? op1 : float64_chs(op1);
- return float64_muladd(op1, op2, dest, 0, stat);
+ return float64_muladd(op1, op2, dest, float_muladd_negate_product, stat);
}
#define DO_MULADD(NAME, FUNC, TYPE) \
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 25/34] target/arm: Handle FPCR.AH in gvec_fcmla[hsd]
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (23 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 24/34] target/arm: Use flags for AH negation in float*_ah_mulsub_f Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 26/34] target/arm: Handle FPCR.AH in gvec_fcmla[hs]_idx Richard Henderson
` (8 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate-a64.c | 2 +-
target/arm/tcg/vec_helper.c | 66 ++++++++++++++++++++--------------
2 files changed, 40 insertions(+), 28 deletions(-)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 715760a17b..3748f7d145 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -6175,7 +6175,7 @@ static bool trans_FCMLA_v(DisasContext *s, arg_FCMLA_v *a)
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
- a->rot, fn[a->esz]);
+ a->rot | (s->fpcr_ah << 2), fn[a->esz]);
return true;
}
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 5c1e84bf27..76637d072d 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -965,22 +965,26 @@ void HELPER(gvec_fcmlah)(void *vd, void *vn, void *vm, void *va,
uintptr_t opr_sz = simd_oprsz(desc);
float16 *d = vd, *n = vn, *m = vm, *a = va;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- uint32_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float16 negx_imag, negx_real;
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 15;
- neg_imag <<= 15;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 15;
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < opr_sz / 2; i += 2) {
float16 e2 = n[H2(i + flip)];
- float16 e1 = m[H2(i + flip)] ^ neg_real;
+ float16 e1 = m[H2(i + flip)] ^ negx_real;
float16 e4 = e2;
- float16 e3 = m[H2(i + 1 - flip)] ^ neg_imag;
+ float16 e3 = m[H2(i + 1 - flip)] ^ negx_imag;
- d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], 0, fpst);
- d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], 0, fpst);
+ d[H2(i)] = float16_muladd(e2, e1, a[H2(i)], negf_real, fpst);
+ d[H2(i + 1)] = float16_muladd(e4, e3, a[H2(i + 1)], negf_imag, fpst);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
@@ -1025,22 +1029,26 @@ void HELPER(gvec_fcmlas)(void *vd, void *vn, void *vm, void *va,
uintptr_t opr_sz = simd_oprsz(desc);
float32 *d = vd, *n = vn, *m = vm, *a = va;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- uint32_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float32 negx_imag, negx_real;
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 31;
- neg_imag <<= 31;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 31;
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < opr_sz / 4; i += 2) {
float32 e2 = n[H4(i + flip)];
- float32 e1 = m[H4(i + flip)] ^ neg_real;
+ float32 e1 = m[H4(i + flip)] ^ negx_real;
float32 e4 = e2;
- float32 e3 = m[H4(i + 1 - flip)] ^ neg_imag;
+ float32 e3 = m[H4(i + 1 - flip)] ^ negx_imag;
- d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], 0, fpst);
- d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], 0, fpst);
+ d[H4(i)] = float32_muladd(e2, e1, a[H4(i)], negf_real, fpst);
+ d[H4(i + 1)] = float32_muladd(e4, e3, a[H4(i + 1)], negf_imag, fpst);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
@@ -1085,22 +1093,26 @@ void HELPER(gvec_fcmlad)(void *vd, void *vn, void *vm, void *va,
uintptr_t opr_sz = simd_oprsz(desc);
float64 *d = vd, *n = vn, *m = vm, *a = va;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint64_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- uint64_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float64 negx_real, negx_imag;
uintptr_t i;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 63;
- neg_imag <<= 63;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63;
+ negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < opr_sz / 8; i += 2) {
float64 e2 = n[i + flip];
- float64 e1 = m[i + flip] ^ neg_real;
+ float64 e1 = m[i + flip] ^ negx_real;
float64 e4 = e2;
- float64 e3 = m[i + 1 - flip] ^ neg_imag;
+ float64 e3 = m[i + 1 - flip] ^ negx_imag;
- d[i] = float64_muladd(e2, e1, a[i], 0, fpst);
- d[i + 1] = float64_muladd(e4, e3, a[i + 1], 0, fpst);
+ d[i] = float64_muladd(e2, e1, a[i], negf_real, fpst);
+ d[i + 1] = float64_muladd(e4, e3, a[i + 1], negf_imag, fpst);
}
clear_tail(d, opr_sz, simd_maxsz(desc));
}
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 26/34] target/arm: Handle FPCR.AH in gvec_fcmla[hs]_idx
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (24 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 25/34] target/arm: Handle FPCR.AH in gvec_fcmla[hsd] Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 27/34] target/arm: Handle FPCR.AH in sve_fcmla_zpzzz_* Richard Henderson
` (7 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate-a64.c | 2 +-
target/arm/tcg/vec_helper.c | 44 ++++++++++++++++++++--------------
2 files changed, 27 insertions(+), 19 deletions(-)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 3748f7d145..9e751e737a 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -6922,7 +6922,7 @@ static bool trans_FCMLA_vi(DisasContext *s, arg_FCMLA_vi *a)
if (fp_access_check(s)) {
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64,
- (a->idx << 2) | a->rot, fn);
+ (s->fpcr_ah << 4) | (a->idx << 2) | a->rot, fn);
}
return true;
}
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 76637d072d..964000773a 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -995,29 +995,33 @@ void HELPER(gvec_fcmlah_idx)(void *vd, void *vn, void *vm, void *va,
uintptr_t opr_sz = simd_oprsz(desc);
float16 *d = vd, *n = vn, *m = vm, *a = va;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
- uint32_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
+ uint32_t negf_real = flip ^ negf_imag;
intptr_t elements = opr_sz / sizeof(float16);
intptr_t eltspersegment = MIN(16 / sizeof(float16), elements);
+ float16 negx_imag, negx_real;
intptr_t i, j;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 15;
- neg_imag <<= 15;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 15;
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < elements; i += eltspersegment) {
float16 mr = m[H2(i + 2 * index + 0)];
float16 mi = m[H2(i + 2 * index + 1)];
- float16 e1 = neg_real ^ (flip ? mi : mr);
- float16 e3 = neg_imag ^ (flip ? mr : mi);
+ float16 e1 = negx_real ^ (flip ? mi : mr);
+ float16 e3 = negx_imag ^ (flip ? mr : mi);
for (j = i; j < i + eltspersegment; j += 2) {
float16 e2 = n[H2(j + flip)];
float16 e4 = e2;
- d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], 0, fpst);
- d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], 0, fpst);
+ d[H2(j)] = float16_muladd(e2, e1, a[H2(j)], negf_real, fpst);
+ d[H2(j + 1)] = float16_muladd(e4, e3, a[H2(j + 1)], negf_imag, fpst);
}
}
clear_tail(d, opr_sz, simd_maxsz(desc));
@@ -1059,29 +1063,33 @@ void HELPER(gvec_fcmlas_idx)(void *vd, void *vn, void *vm, void *va,
uintptr_t opr_sz = simd_oprsz(desc);
float32 *d = vd, *n = vn, *m = vm, *a = va;
intptr_t flip = extract32(desc, SIMD_DATA_SHIFT, 1);
- uint32_t neg_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
intptr_t index = extract32(desc, SIMD_DATA_SHIFT + 2, 2);
- uint32_t neg_real = flip ^ neg_imag;
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 4, 1);
+ uint32_t negf_real = flip ^ negf_imag;
intptr_t elements = opr_sz / sizeof(float32);
intptr_t eltspersegment = MIN(16 / sizeof(float32), elements);
+ float32 negx_imag, negx_real;
intptr_t i, j;
- /* Shift boolean to the sign bit so we can xor to negate. */
- neg_real <<= 31;
- neg_imag <<= 31;
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 31;
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
for (i = 0; i < elements; i += eltspersegment) {
float32 mr = m[H4(i + 2 * index + 0)];
float32 mi = m[H4(i + 2 * index + 1)];
- float32 e1 = neg_real ^ (flip ? mi : mr);
- float32 e3 = neg_imag ^ (flip ? mr : mi);
+ float32 e1 = negx_real ^ (flip ? mi : mr);
+ float32 e3 = negx_imag ^ (flip ? mr : mi);
for (j = i; j < i + eltspersegment; j += 2) {
float32 e2 = n[H4(j + flip)];
float32 e4 = e2;
- d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], 0, fpst);
- d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], 0, fpst);
+ d[H4(j)] = float32_muladd(e2, e1, a[H4(j)], negf_real, fpst);
+ d[H4(j + 1)] = float32_muladd(e4, e3, a[H4(j + 1)], negf_imag, fpst);
}
}
clear_tail(d, opr_sz, simd_maxsz(desc));
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 27/34] target/arm: Handle FPCR.AH in sve_fcmla_zpzzz_*
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (25 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 26/34] target/arm: Handle FPCR.AH in gvec_fcmla[hs]_idx Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 28/34] target/arm: Split gvec_fmla_idx_* for fmls and ah_fmls Richard Henderson
` (6 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/sve_helper.c | 69 +++++++++++++++++++++-------------
target/arm/tcg/translate-sve.c | 2 +-
2 files changed, 43 insertions(+), 28 deletions(-)
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index c12b2600bd..c206ca65ce 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -5347,13 +5347,18 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
- unsigned rot = simd_data(desc);
- bool flip = rot & 1;
- float16 neg_imag, neg_real;
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float16 negx_imag, negx_real;
uint64_t *g = vg;
- neg_imag = float16_set_sign(0, (rot & 2) != 0);
- neg_real = float16_set_sign(0, rot == 1 || rot == 2);
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 15;
+ negx_imag = (negf_imag & ~fpcr_ah) << 15;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5370,18 +5375,18 @@ void HELPER(sve_fcmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
mi = *(float16 *)(vm + H1_2(j));
e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
+ e1 = (flip ? mi : mr) ^ negx_real;
e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
+ e3 = (flip ? mr : mi) ^ negx_imag;
if (likely((pg >> (i & 63)) & 1)) {
d = *(float16 *)(va + H1_2(i));
- d = float16_muladd(e2, e1, d, 0, status);
+ d = float16_muladd(e2, e1, d, negf_real, status);
*(float16 *)(vd + H1_2(i)) = d;
}
if (likely((pg >> (j & 63)) & 1)) {
d = *(float16 *)(va + H1_2(j));
- d = float16_muladd(e4, e3, d, 0, status);
+ d = float16_muladd(e4, e3, d, negf_imag, status);
*(float16 *)(vd + H1_2(j)) = d;
}
} while (i & 63);
@@ -5392,13 +5397,18 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
- unsigned rot = simd_data(desc);
- bool flip = rot & 1;
- float32 neg_imag, neg_real;
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float32 negx_imag, negx_real;
uint64_t *g = vg;
- neg_imag = float32_set_sign(0, (rot & 2) != 0);
- neg_real = float32_set_sign(0, rot == 1 || rot == 2);
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (negf_real & ~fpcr_ah) << 31;
+ negx_imag = (negf_imag & ~fpcr_ah) << 31;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5415,18 +5425,18 @@ void HELPER(sve_fcmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
mi = *(float32 *)(vm + H1_2(j));
e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
+ e1 = (flip ? mi : mr) ^ negx_real;
e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
+ e3 = (flip ? mr : mi) ^ negx_imag;
if (likely((pg >> (i & 63)) & 1)) {
d = *(float32 *)(va + H1_2(i));
- d = float32_muladd(e2, e1, d, 0, status);
+ d = float32_muladd(e2, e1, d, negf_real, status);
*(float32 *)(vd + H1_2(i)) = d;
}
if (likely((pg >> (j & 63)) & 1)) {
d = *(float32 *)(va + H1_2(j));
- d = float32_muladd(e4, e3, d, 0, status);
+ d = float32_muladd(e4, e3, d, negf_imag, status);
*(float32 *)(vd + H1_2(j)) = d;
}
} while (i & 63);
@@ -5437,13 +5447,18 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
intptr_t j, i = simd_oprsz(desc);
- unsigned rot = simd_data(desc);
- bool flip = rot & 1;
- float64 neg_imag, neg_real;
+ bool flip = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint32_t fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
+ uint32_t negf_imag = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ uint32_t negf_real = flip ^ negf_imag;
+ float64 negx_imag, negx_real;
uint64_t *g = vg;
- neg_imag = float64_set_sign(0, (rot & 2) != 0);
- neg_real = float64_set_sign(0, rot == 1 || rot == 2);
+ /* With AH=0, use negx; with AH=1 use negf. */
+ negx_real = (uint64_t)(negf_real & ~fpcr_ah) << 63;
+ negx_imag = (uint64_t)(negf_imag & ~fpcr_ah) << 63;
+ negf_real = (negf_real & fpcr_ah ? float_muladd_negate_product : 0);
+ negf_imag = (negf_imag & fpcr_ah ? float_muladd_negate_product : 0);
do {
uint64_t pg = g[(i - 1) >> 6];
@@ -5460,18 +5475,18 @@ void HELPER(sve_fcmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
mi = *(float64 *)(vm + H1_2(j));
e2 = (flip ? ni : nr);
- e1 = (flip ? mi : mr) ^ neg_real;
+ e1 = (flip ? mi : mr) ^ negx_real;
e4 = e2;
- e3 = (flip ? mr : mi) ^ neg_imag;
+ e3 = (flip ? mr : mi) ^ negx_imag;
if (likely((pg >> (i & 63)) & 1)) {
d = *(float64 *)(va + H1_2(i));
- d = float64_muladd(e2, e1, d, 0, status);
+ d = float64_muladd(e2, e1, d, negf_real, status);
*(float64 *)(vd + H1_2(i)) = d;
}
if (likely((pg >> (j & 63)) & 1)) {
d = *(float64 *)(va + H1_2(j));
- d = float64_muladd(e4, e3, d, 0, status);
+ d = float64_muladd(e4, e3, d, negf_imag, status);
*(float64 *)(vd + H1_2(j)) = d;
}
} while (i & 63);
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index cb6bb27622..6e758fd1ca 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -3964,7 +3964,7 @@ static gen_helper_gvec_5_ptr * const fcmla_fns[4] = {
gen_helper_sve_fcmla_zpzzz_s, gen_helper_sve_fcmla_zpzzz_d,
};
TRANS_FEAT(FCMLA_zpzzz, aa64_sve, gen_gvec_fpst_zzzzp, fcmla_fns[a->esz],
- a->rd, a->rn, a->rm, a->ra, a->pg, a->rot,
+ a->rd, a->rn, a->rm, a->ra, a->pg, a->rot | (s->fpcr_ah << 2),
a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
static gen_helper_gvec_4_ptr * const fcmla_idx_fns[4] = {
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 28/34] target/arm: Split gvec_fmla_idx_* for fmls and ah_fmls
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (26 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 27/34] target/arm: Handle FPCR.AH in sve_fcmla_zpzzz_* Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-31 16:46 ` Peter Maydell
2025-01-29 1:38 ` [PATCH v2 29/34] Revert "target/arm: Handle FPCR.AH in FMLSL" Richard Henderson
` (5 subsequent siblings)
33 siblings, 1 reply; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Split negation cases out of gvec_fmla, creating 6 new helpers.
We no longer pass 'neg' as a bit in simd_data.
Handle FPCR.AH=0 via xor and FPCR.AH=1 via muladd flags.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 14 ++++++++++++++
target/arm/tcg/translate-a64.c | 17 +++++++++++------
target/arm/tcg/translate-sve.c | 31 +++++++++++++++++--------------
target/arm/tcg/vec_helper.c | 29 +++++++++++++++--------------
4 files changed, 57 insertions(+), 34 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index dbad1f5d74..0907505839 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -821,6 +821,20 @@ DEF_HELPER_FLAGS_6(gvec_fmla_idx_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_6(gvec_fmla_idx_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_fmls_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_6(gvec_ah_fmls_idx_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, fpst, i32)
+
DEF_HELPER_FLAGS_5(gvec_uqadd_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(gvec_uqadd_h, TCG_CALL_NO_RWG,
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 9e751e737a..9b1675b041 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -6742,10 +6742,16 @@ TRANS(FMULX_vi, do_fp3_vector_idx, a, f_vector_idx_fmulx)
static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
{
- static gen_helper_gvec_4_ptr * const fns[3] = {
- gen_helper_gvec_fmla_idx_h,
- gen_helper_gvec_fmla_idx_s,
- gen_helper_gvec_fmla_idx_d,
+ static gen_helper_gvec_4_ptr * const fns[3][3] = {
+ { gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_fmla_idx_s,
+ gen_helper_gvec_fmla_idx_d },
+ { gen_helper_gvec_fmls_idx_h,
+ gen_helper_gvec_fmls_idx_s,
+ gen_helper_gvec_fmls_idx_d },
+ { gen_helper_gvec_ah_fmls_idx_h,
+ gen_helper_gvec_ah_fmls_idx_s,
+ gen_helper_gvec_ah_fmls_idx_d },
};
MemOp esz = a->esz;
int check = fp_access_check_vector_hsd(s, a->q, esz);
@@ -6756,8 +6762,7 @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
esz == MO_16 ? FPST_A64_F16 : FPST_A64,
- (s->fpcr_ah << 5) | (a->idx << 1) | neg,
- fns[esz - 1]);
+ a->idx, fns[esz - 1][neg ? 1 + s->fpcr_ah : 0]);
return true;
}
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 6e758fd1ca..dbe66d3b15 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -3524,21 +3524,24 @@ DO_SVE2_RRXR_ROT(CDOT_zzxw_d, gen_helper_sve2_cdot_idx_d)
*** SVE Floating Point Multiply-Add Indexed Group
*/
-static bool do_FMLA_zzxz(DisasContext *s, arg_rrxr_esz *a, bool sub)
-{
- static gen_helper_gvec_4_ptr * const fns[4] = {
- NULL,
- gen_helper_gvec_fmla_idx_h,
- gen_helper_gvec_fmla_idx_s,
- gen_helper_gvec_fmla_idx_d,
- };
- return gen_gvec_fpst_zzzz(s, fns[a->esz], a->rd, a->rn, a->rm, a->ra,
- (s->fpcr_ah << 5) | (a->index << 1) | sub,
- a->esz == MO_16 ? FPST_A64_F16 : FPST_A64);
-}
+static gen_helper_gvec_4_ptr * const fmla_idx_fns[4] = {
+ NULL, gen_helper_gvec_fmla_idx_h,
+ gen_helper_gvec_fmla_idx_s, gen_helper_gvec_fmla_idx_d
+};
+TRANS_FEAT(FMLA_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
+ fmla_idx_fns[a->esz], a->rd, a->rn, a->rm, a->ra, a->index,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
-TRANS_FEAT(FMLA_zzxz, aa64_sve, do_FMLA_zzxz, a, false)
-TRANS_FEAT(FMLS_zzxz, aa64_sve, do_FMLA_zzxz, a, true)
+static gen_helper_gvec_4_ptr * const fmls_idx_fns[4][2] = {
+ { NULL, NULL },
+ { gen_helper_gvec_fmls_idx_h, gen_helper_gvec_ah_fmls_idx_h },
+ { gen_helper_gvec_fmls_idx_s, gen_helper_gvec_ah_fmls_idx_s },
+ { gen_helper_gvec_fmls_idx_d, gen_helper_gvec_ah_fmls_idx_d },
+};
+TRANS_FEAT(FMLS_zzxz, aa64_sve, gen_gvec_fpst_zzzz,
+ fmls_idx_fns[a->esz][s->fpcr_ah],
+ a->rd, a->rn, a->rm, a->ra, a->index,
+ a->esz == MO_16 ? FPST_A64_F16 : FPST_A64)
/*
*** SVE Floating Point Multiply Indexed Group
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 964000773a..728473e919 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -1724,34 +1724,35 @@ DO_FMUL_IDX(gvec_fmls_nf_idx_s, float32_sub, float32_mul, float32, H4)
#undef DO_FMUL_IDX
-#define DO_FMLA_IDX(NAME, TYPE, H) \
+#define DO_FMLA_IDX(NAME, TYPE, H, NEGX, NEGF) \
void HELPER(NAME)(void *vd, void *vn, void *vm, void *va, \
float_status *stat, uint32_t desc) \
{ \
intptr_t i, j, oprsz = simd_oprsz(desc); \
intptr_t segment = MIN(16, oprsz) / sizeof(TYPE); \
- TYPE op1_neg = extract32(desc, SIMD_DATA_SHIFT, 1); \
- intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 1, 3); \
- bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 5, 1); \
+ intptr_t idx = simd_data(desc); \
TYPE *d = vd, *n = vn, *m = vm, *a = va; \
- op1_neg <<= (8 * sizeof(TYPE) - 1); \
for (i = 0; i < oprsz / sizeof(TYPE); i += segment) { \
TYPE mm = m[H(i + idx)]; \
for (j = 0; j < segment; j++) { \
- TYPE nval = n[i + j]; \
- if (!(fpcr_ah && TYPE ## _is_any_nan(nval))) { \
- nval ^= op1_neg; \
- } \
- d[i + j] = TYPE##_muladd(nval, \
- mm, a[i + j], 0, stat); \
+ d[i + j] = TYPE##_muladd(n[i + j] ^ NEGX, mm, \
+ a[i + j], NEGF, stat); \
} \
} \
clear_tail(d, oprsz, simd_maxsz(desc)); \
}
-DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2)
-DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4)
-DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8)
+DO_FMLA_IDX(gvec_fmla_idx_h, float16, H2, 0, 0)
+DO_FMLA_IDX(gvec_fmla_idx_s, float32, H4, 0, 0)
+DO_FMLA_IDX(gvec_fmla_idx_d, float64, H8, 0, 0)
+
+DO_FMLA_IDX(gvec_fmls_idx_h, float16, H2, INT16_MIN, 0)
+DO_FMLA_IDX(gvec_fmls_idx_s, float32, H4, INT32_MIN, 0)
+DO_FMLA_IDX(gvec_fmls_idx_d, float64, H8, INT64_MIN, 0)
+
+DO_FMLA_IDX(gvec_ah_fmls_idx_h, float16, H2, 0, float_muladd_negate_product)
+DO_FMLA_IDX(gvec_ah_fmls_idx_s, float32, H4, 0, float_muladd_negate_product)
+DO_FMLA_IDX(gvec_ah_fmls_idx_d, float64, H8, 0, float_muladd_negate_product)
#undef DO_FMLA_IDX
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* Re: [PATCH v2 28/34] target/arm: Split gvec_fmla_idx_* for fmls and ah_fmls
2025-01-29 1:38 ` [PATCH v2 28/34] target/arm: Split gvec_fmla_idx_* for fmls and ah_fmls Richard Henderson
@ 2025-01-31 16:46 ` Peter Maydell
0 siblings, 0 replies; 52+ messages in thread
From: Peter Maydell @ 2025-01-31 16:46 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel
On Wed, 29 Jan 2025 at 01:39, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Split negation cases out of gvec_fmla, creating 6 new helpers.
> We no longer pass 'neg' as a bit in simd_data.
>
> Handle FPCR.AH=0 via xor and FPCR.AH=1 via muladd flags.
> static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
> {
> - static gen_helper_gvec_4_ptr * const fns[3] = {
> - gen_helper_gvec_fmla_idx_h,
> - gen_helper_gvec_fmla_idx_s,
> - gen_helper_gvec_fmla_idx_d,
> + static gen_helper_gvec_4_ptr * const fns[3][3] = {
> + { gen_helper_gvec_fmla_idx_h,
> + gen_helper_gvec_fmla_idx_s,
> + gen_helper_gvec_fmla_idx_d },
> + { gen_helper_gvec_fmls_idx_h,
> + gen_helper_gvec_fmls_idx_s,
> + gen_helper_gvec_fmls_idx_d },
> + { gen_helper_gvec_ah_fmls_idx_h,
> + gen_helper_gvec_ah_fmls_idx_s,
> + gen_helper_gvec_ah_fmls_idx_d },
> };
> MemOp esz = a->esz;
> int check = fp_access_check_vector_hsd(s, a->q, esz);
> @@ -6756,8 +6762,7 @@ static bool do_fmla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool neg)
>
> gen_gvec_op4_fpst(s, a->q, a->rd, a->rn, a->rm, a->rd,
> esz == MO_16 ? FPST_A64_F16 : FPST_A64,
> - (s->fpcr_ah << 5) | (a->idx << 1) | neg,
> - fns[esz - 1]);
> + a->idx, fns[esz - 1][neg ? 1 + s->fpcr_ah : 0]);
The indexes into fns[][] here are the wrong way around, so
if you try to do a FMLA on a double it hands you back
gen_helper_gvec_ah_fmls_idx_h ...
thanks
-- PMM
^ permalink raw reply [flat|nested] 52+ messages in thread
* [PATCH v2 29/34] Revert "target/arm: Handle FPCR.AH in FMLSL"
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (27 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 28/34] target/arm: Split gvec_fmla_idx_* for fmls and ah_fmls Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 30/34] target/arm: Handle FPCR.AH in gvec_fmlal_a64 Richard Henderson
` (4 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
This reverts commit c5eb0b62e603c1d391ee2199108f0eb34aadc8f5.
---
target/arm/tcg/translate-a64.c | 4 ++--
target/arm/tcg/vec_helper.c | 28 ++++------------------------
2 files changed, 6 insertions(+), 26 deletions(-)
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index 9b1675b041..d27b4e964d 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -5997,7 +5997,7 @@ TRANS(FMINNMP_v, do_fp3_vector, a, 0, f_vector_fminnmp)
static bool do_fmlal(DisasContext *s, arg_qrrr_e *a, bool is_s, bool is_2)
{
if (fp_access_check(s)) {
- int data = (s->fpcr_ah << 2) | (is_2 << 1) | is_s;
+ int data = (is_2 << 1) | is_s;
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
vec_full_reg_offset(s, a->rm), tcg_env,
@@ -6772,7 +6772,7 @@ TRANS(FMLS_vi, do_fmla_vector_idx, a, true)
static bool do_fmlal_idx(DisasContext *s, arg_qrrx_e *a, bool is_s, bool is_2)
{
if (fp_access_check(s)) {
- int data = (s->fpcr_ah << 5) | (a->idx << 2) | (is_2 << 1) | is_s;
+ int data = (a->idx << 2) | (is_2 << 1) | is_s;
tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
vec_full_reg_offset(s, a->rn),
vec_full_reg_offset(s, a->rm), tcg_env,
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 728473e919..b3ed6533bb 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2120,26 +2120,6 @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
return ptr[is_q & is_2] >> ((is_2 & ~is_q) << 5);
}
-static uint64_t neg4_f16(uint64_t v, bool fpcr_ah)
-{
- /*
- * Negate all inputs for FMLSL at once. This is slightly complicated
- * by the need to avoid flipping the sign of a NaN when FPCR.AH == 1
- */
- uint64_t mask = 0x8000800080008000ull;
- if (fpcr_ah) {
- uint64_t tmp = v, signbit = 0x8000;
- for (int i = 0; i < 4; i++) {
- if (float16_is_any_nan(extract64(tmp, 0, 16))) {
- mask ^= signbit;
- }
- tmp >>= 16;
- signbit <<= 16;
- }
- }
- return v ^ mask;
-}
-
/*
* Note that FMLAL requires oprsz == 8 or oprsz == 16,
* as there is not yet SVE versions that might use blocking.
@@ -2151,7 +2131,6 @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
intptr_t i, oprsz = simd_oprsz(desc);
int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
- bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 2, 1);
int is_q = oprsz == 16;
uint64_t n_4, m_4;
@@ -2159,8 +2138,9 @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
n_4 = load4_f16(vn, is_q, is_2);
m_4 = load4_f16(vm, is_q, is_2);
+ /* Negate all inputs for FMLSL at once. */
if (is_s) {
- n_4 = neg4_f16(n_4, fpcr_ah);
+ n_4 ^= 0x8000800080008000ull;
}
for (i = 0; i < oprsz / 4; i++) {
@@ -2212,7 +2192,6 @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
- bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 5, 1);
int is_q = oprsz == 16;
uint64_t n_4;
float32 m_1;
@@ -2220,8 +2199,9 @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
/* Pre-load all of the f16 data, avoiding overlap issues. */
n_4 = load4_f16(vn, is_q, is_2);
+ /* Negate all inputs for FMLSL at once. */
if (is_s) {
- n_4 = neg4_f16(n_4, fpcr_ah);
+ n_4 ^= 0x8000800080008000ull;
}
m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16);
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 30/34] target/arm: Handle FPCR.AH in gvec_fmlal_a64
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (28 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 29/34] Revert "target/arm: Handle FPCR.AH in FMLSL" Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 31/34] target/arm: Handle FPCR.AH in sve2_fmlal_zzxw_s Richard Henderson
` (3 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/vec_helper.c | 71 ++++++++++++++++++++++++-------------
1 file changed, 46 insertions(+), 25 deletions(-)
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index b3ed6533bb..9b14885ef2 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2126,27 +2126,24 @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
*/
static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
- uint32_t desc, bool fz16)
+ uint64_t negx, int negf, uint32_t desc, bool fz16)
{
intptr_t i, oprsz = simd_oprsz(desc);
- int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
int is_q = oprsz == 16;
uint64_t n_4, m_4;
- /* Pre-load all of the f16 data, avoiding overlap issues. */
- n_4 = load4_f16(vn, is_q, is_2);
+ /*
+ * Pre-load all of the f16 data, avoiding overlap issues.
+ * Negate all inputs for AH=0 FMLSL at once.
+ */
+ n_4 = load4_f16(vn, is_q, is_2) ^ negx;
m_4 = load4_f16(vm, is_q, is_2);
- /* Negate all inputs for FMLSL at once. */
- if (is_s) {
- n_4 ^= 0x8000800080008000ull;
- }
-
for (i = 0; i < oprsz / 4; i++) {
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
float32 m_1 = float16_to_float32_by_bits(m_4 >> (i * 16), fz16);
- d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst);
}
clear_tail(d, oprsz, simd_maxsz(desc));
}
@@ -2154,14 +2151,28 @@ static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
- do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc,
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t negx = is_s ? 0x8000800080008000ull : 0;
+
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
}
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
- do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], desc,
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t negx = 0;
+ int negf = 0;
+
+ if (is_s) {
+ if (env->vfp.fpcr & FPCR_AH) {
+ negf = float_muladd_negate_product;
+ } else {
+ negx = 0x8000800080008000ull;
+ }
+ }
+ do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
}
@@ -2186,29 +2197,25 @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
}
static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
- uint32_t desc, bool fz16)
+ uint64_t negx, int negf, uint32_t desc, bool fz16)
{
intptr_t i, oprsz = simd_oprsz(desc);
- int is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
int is_q = oprsz == 16;
uint64_t n_4;
float32 m_1;
- /* Pre-load all of the f16 data, avoiding overlap issues. */
- n_4 = load4_f16(vn, is_q, is_2);
-
- /* Negate all inputs for FMLSL at once. */
- if (is_s) {
- n_4 ^= 0x8000800080008000ull;
- }
-
+ /*
+ * Pre-load all of the f16 data, avoiding overlap issues.
+ * Negate all inputs for AH=0 FMLSL at once.
+ */
+ n_4 = load4_f16(vn, is_q, is_2) ^ negx;
m_1 = float16_to_float32_by_bits(((float16 *)vm)[H2(index)], fz16);
for (i = 0; i < oprsz / 4; i++) {
float32 n_1 = float16_to_float32_by_bits(n_4 >> (i * 16), fz16);
- d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], 0, fpst);
+ d[H4(i)] = float32_muladd(n_1, m_1, d[H4(i)], negf, fpst);
}
clear_tail(d, oprsz, simd_maxsz(desc));
}
@@ -2216,14 +2223,28 @@ static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], desc,
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t negx = is_s ? 0x8000800080008000ull : 0;
+
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
}
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
CPUARMState *env, uint32_t desc)
{
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], desc,
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
+ uint64_t negx = 0;
+ int negf = 0;
+
+ if (is_s) {
+ if (env->vfp.fpcr & FPCR_AH) {
+ negf = float_muladd_negate_product;
+ } else {
+ negx = 0x8000800080008000ull;
+ }
+ }
+ do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
}
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 31/34] target/arm: Handle FPCR.AH in sve2_fmlal_zzxw_s
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (29 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 30/34] target/arm: Handle FPCR.AH in gvec_fmlal_a64 Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 32/34] target/arm: Handle FPCR.AH in sve2_fmlal_zzzw_s Richard Henderson
` (2 subsequent siblings)
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/vec_helper.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 9b14885ef2..c716bd774a 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2252,23 +2252,32 @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
CPUARMState *env, uint32_t desc)
{
intptr_t i, j, oprsz = simd_oprsz(desc);
- uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
float_status *status = &env->vfp.fp_status[FPST_A64];
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
+ int negx = 0, negf = 0;
+
+ if (is_s) {
+ if (env->vfp.fpcr & FPCR_AH) {
+ negf = float_muladd_negate_product;
+ } else {
+ negx = 0x8000;
+ }
+ }
for (i = 0; i < oprsz; i += 16) {
float16 mm_16 = *(float16 *)(vm + i + idx);
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
for (j = 0; j < 16; j += sizeof(float32)) {
- float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negn;
+ float16 nn_16 = *(float16 *)(vn + H1_2(i + j + sel)) ^ negx;
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
float32 aa = *(float32 *)(va + H1_4(i + j));
*(float32 *)(vd + H1_4(i + j)) =
- float32_muladd(nn, mm, aa, 0, status);
+ float32_muladd(nn, mm, aa, negf, status);
}
}
}
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 32/34] target/arm: Handle FPCR.AH in sve2_fmlal_zzzw_s
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (30 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 31/34] target/arm: Handle FPCR.AH in sve2_fmlal_zzxw_s Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 33/34] target/arm: Read fz16 from env->vfp.fpcr Richard Henderson
2025-01-29 1:38 ` [PATCH v2 34/34] target/arm: Sink fp_status and fpcr access into do_fmlal* Richard Henderson
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/vec_helper.c | 15 ++++++++++++---
1 file changed, 12 insertions(+), 3 deletions(-)
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index c716bd774a..bae98a34b8 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2180,19 +2180,28 @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
CPUARMState *env, uint32_t desc)
{
intptr_t i, oprsz = simd_oprsz(desc);
- uint16_t negn = extract32(desc, SIMD_DATA_SHIFT, 1) << 15;
+ bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
float_status *status = &env->vfp.fp_status[FPST_A64];
bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
+ int negx = 0, negf = 0;
+
+ if (is_s) {
+ if (env->vfp.fpcr & FPCR_AH) {
+ negf = float_muladd_negate_product;
+ } else {
+ negx = 0x8000;
+ }
+ }
for (i = 0; i < oprsz; i += sizeof(float32)) {
- float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negn;
+ float16 nn_16 = *(float16 *)(vn + H1_2(i + sel)) ^ negx;
float16 mm_16 = *(float16 *)(vm + H1_2(i + sel));
float32 nn = float16_to_float32_by_bits(nn_16, fz16);
float32 mm = float16_to_float32_by_bits(mm_16, fz16);
float32 aa = *(float32 *)(va + H1_4(i));
- *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, 0, status);
+ *(float32 *)(vd + H1_4(i)) = float32_muladd(nn, mm, aa, negf, status);
}
}
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 33/34] target/arm: Read fz16 from env->vfp.fpcr
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (31 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 32/34] target/arm: Handle FPCR.AH in sve2_fmlal_zzzw_s Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
2025-01-29 1:38 ` [PATCH v2 34/34] target/arm: Sink fp_status and fpcr access into do_fmlal* Richard Henderson
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Read the bit from the source, rather than from the proxy via
get_flush_inputs_to_zero. This makes it clear that it does
not matter which of the float_status structures is used.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/vec_helper.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index bae98a34b8..03b0a6ebed 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2155,7 +2155,7 @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
+ env->vfp.fpcr & FPCR_FZ16);
}
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
@@ -2173,7 +2173,7 @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
}
}
do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
+ env->vfp.fpcr & FPCR_FZ16);
}
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
@@ -2183,7 +2183,7 @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
float_status *status = &env->vfp.fp_status[FPST_A64];
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
int negx = 0, negf = 0;
if (is_s) {
@@ -2236,7 +2236,7 @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A32_F16]));
+ env->vfp.fpcr & FPCR_FZ16);
}
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
@@ -2254,7 +2254,7 @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
}
}
do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
- get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]));
+ env->vfp.fpcr & FPCR_FZ16);
}
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
@@ -2265,7 +2265,7 @@ void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
intptr_t sel = extract32(desc, SIMD_DATA_SHIFT + 1, 1) * sizeof(float16);
intptr_t idx = extract32(desc, SIMD_DATA_SHIFT + 2, 3) * sizeof(float16);
float_status *status = &env->vfp.fp_status[FPST_A64];
- bool fz16 = get_flush_inputs_to_zero(&env->vfp.fp_status[FPST_A64_F16]);
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
int negx = 0, negf = 0;
if (is_s) {
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread
* [PATCH v2 34/34] target/arm: Sink fp_status and fpcr access into do_fmlal*
2025-01-29 1:38 [PATCH v2 00/34] target/arm: FEAT_AFP followups for FEAT_SME2 Richard Henderson
` (32 preceding siblings ...)
2025-01-29 1:38 ` [PATCH v2 33/34] target/arm: Read fz16 from env->vfp.fpcr Richard Henderson
@ 2025-01-29 1:38 ` Richard Henderson
33 siblings, 0 replies; 52+ messages in thread
From: Richard Henderson @ 2025-01-29 1:38 UTC (permalink / raw)
To: qemu-devel; +Cc: peter.maydell
Sink common code from the callers into do_fmlal
and do_fmlal_idx. Reorder the arguments to minimize
the re-sorting from the caller's arguments.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/vec_helper.c | 28 ++++++++++++++++------------
1 file changed, 16 insertions(+), 12 deletions(-)
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index 03b0a6ebed..25ef7af029 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -2125,9 +2125,13 @@ static uint64_t load4_f16(uint64_t *ptr, int is_q, int is_2)
* as there is not yet SVE versions that might use blocking.
*/
-static void do_fmlal(float32 *d, void *vn, void *vm, float_status *fpst,
- uint64_t negx, int negf, uint32_t desc, bool fz16)
+static void do_fmlal(float32 *d, void *vn, void *vm,
+ CPUARMState *env, uint32_t desc,
+ ARMFPStatusFlavour fpst_idx,
+ uint64_t negx, int negf)
{
+ float_status *fpst = &env->vfp.fp_status[fpst_idx];
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
intptr_t i, oprsz = simd_oprsz(desc);
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
int is_q = oprsz == 16;
@@ -2154,8 +2158,7 @@ void HELPER(gvec_fmlal_a32)(void *vd, void *vn, void *vm,
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
- do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
- env->vfp.fpcr & FPCR_FZ16);
+ do_fmlal(vd, vn, vm, env, desc, FPST_STD, negx, 0);
}
void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
@@ -2172,8 +2175,7 @@ void HELPER(gvec_fmlal_a64)(void *vd, void *vn, void *vm,
negx = 0x8000800080008000ull;
}
}
- do_fmlal(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
- env->vfp.fpcr & FPCR_FZ16);
+ do_fmlal(vd, vn, vm, env, desc, FPST_A64, negx, negf);
}
void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
@@ -2205,9 +2207,13 @@ void HELPER(sve2_fmlal_zzzw_s)(void *vd, void *vn, void *vm, void *va,
}
}
-static void do_fmlal_idx(float32 *d, void *vn, void *vm, float_status *fpst,
- uint64_t negx, int negf, uint32_t desc, bool fz16)
+static void do_fmlal_idx(float32 *d, void *vn, void *vm,
+ CPUARMState *env, uint32_t desc,
+ ARMFPStatusFlavour fpst_idx,
+ uint64_t negx, int negf)
{
+ float_status *fpst = &env->vfp.fp_status[fpst_idx];
+ bool fz16 = env->vfp.fpcr & FPCR_FZ16;
intptr_t i, oprsz = simd_oprsz(desc);
int is_2 = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
int index = extract32(desc, SIMD_DATA_SHIFT + 2, 3);
@@ -2235,8 +2241,7 @@ void HELPER(gvec_fmlal_idx_a32)(void *vd, void *vn, void *vm,
bool is_s = extract32(desc, SIMD_DATA_SHIFT, 1);
uint64_t negx = is_s ? 0x8000800080008000ull : 0;
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_STD], negx, 0, desc,
- env->vfp.fpcr & FPCR_FZ16);
+ do_fmlal_idx(vd, vn, vm, env, desc, FPST_STD, negx, 0);
}
void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
@@ -2253,8 +2258,7 @@ void HELPER(gvec_fmlal_idx_a64)(void *vd, void *vn, void *vm,
negx = 0x8000800080008000ull;
}
}
- do_fmlal_idx(vd, vn, vm, &env->vfp.fp_status[FPST_A64], negx, negf, desc,
- env->vfp.fpcr & FPCR_FZ16);
+ do_fmlal_idx(vd, vn, vm, env, desc, FPST_A64, negx, negf);
}
void HELPER(sve2_fmlal_zzxw_s)(void *vd, void *vn, void *vm, void *va,
--
2.43.0
^ permalink raw reply related [flat|nested] 52+ messages in thread