* [PATCH 01/17] softfloat: Add float{16,32,64}_muladd_scalbn
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-09 16:06 ` Philippe Mathieu-Daudé
2024-12-08 22:48 ` [PATCH 02/17] target/arm: Use float*_muladd_scalbn Richard Henderson
` (15 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
We currently have a flag, float_muladd_halve_result, to scale
the result by 2**-1. Extend this to handle arbitrary scaling.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat.h | 6 ++++
fpu/softfloat.c | 58 ++++++++++++++++++++++-----------------
fpu/softfloat-parts.c.inc | 7 +++--
3 files changed, 44 insertions(+), 27 deletions(-)
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index eb64075b9c..c34ce0477d 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -238,6 +238,8 @@ float16 float16_add(float16, float16, float_status *status);
float16 float16_sub(float16, float16, float_status *status);
float16 float16_mul(float16, float16, float_status *status);
float16 float16_muladd(float16, float16, float16, int, float_status *status);
+float16 float16_muladd_scalbn(float16, float16, float16,
+ int, int, float_status *status);
float16 float16_div(float16, float16, float_status *status);
float16 float16_scalbn(float16, int, float_status *status);
float16 float16_min(float16, float16, float_status *status);
@@ -597,6 +599,8 @@ float32 float32_mul(float32, float32, float_status *status);
float32 float32_div(float32, float32, float_status *status);
float32 float32_rem(float32, float32, float_status *status);
float32 float32_muladd(float32, float32, float32, int, float_status *status);
+float32 float32_muladd_scalbn(float32, float32, float32,
+ int, int, float_status *status);
float32 float32_sqrt(float32, float_status *status);
float32 float32_exp2(float32, float_status *status);
float32 float32_log2(float32, float_status *status);
@@ -792,6 +796,8 @@ float64 float64_mul(float64, float64, float_status *status);
float64 float64_div(float64, float64, float_status *status);
float64 float64_rem(float64, float64, float_status *status);
float64 float64_muladd(float64, float64, float64, int, float_status *status);
+float64 float64_muladd_scalbn(float64, float64, float64,
+ int, int, float_status *status);
float64 float64_sqrt(float64, float_status *status);
float64 float64_log2(float64, float_status *status);
FloatRelation float64_compare(float64, float64, float_status *status);
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 027a8e576d..a4174de692 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -789,15 +789,15 @@ static FloatParts128 *parts128_mul(FloatParts128 *a, FloatParts128 *b,
#define parts_mul(A, B, S) \
PARTS_GENERIC_64_128(mul, A)(A, B, S)
-static FloatParts64 *parts64_muladd(FloatParts64 *a, FloatParts64 *b,
- FloatParts64 *c, int flags,
- float_status *s);
-static FloatParts128 *parts128_muladd(FloatParts128 *a, FloatParts128 *b,
- FloatParts128 *c, int flags,
- float_status *s);
+static FloatParts64 *parts64_muladd_scalbn(FloatParts64 *a, FloatParts64 *b,
+ FloatParts64 *c, int scale,
+ int flags, float_status *s);
+static FloatParts128 *parts128_muladd_scalbn(FloatParts128 *a, FloatParts128 *b,
+ FloatParts128 *c, int scale,
+ int flags, float_status *s);
-#define parts_muladd(A, B, C, Z, S) \
- PARTS_GENERIC_64_128(muladd, A)(A, B, C, Z, S)
+#define parts_muladd_scalbn(A, B, C, Z, Y, S) \
+ PARTS_GENERIC_64_128(muladd_scalbn, A)(A, B, C, Z, Y, S)
static FloatParts64 *parts64_div(FloatParts64 *a, FloatParts64 *b,
float_status *s);
@@ -2212,43 +2212,50 @@ floatx80_mul(floatx80 a, floatx80 b, float_status *status)
* Fused multiply-add
*/
-float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
- int flags, float_status *status)
+float16 QEMU_FLATTEN
+float16_muladd_scalbn(float16 a, float16 b, float16 c,
+ int scale, int flags, float_status *status)
{
FloatParts64 pa, pb, pc, *pr;
float16_unpack_canonical(&pa, a, status);
float16_unpack_canonical(&pb, b, status);
float16_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
return float16_round_pack_canonical(pr, status);
}
-static float32 QEMU_SOFTFLOAT_ATTR
-soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
- float_status *status)
+float16 float16_muladd(float16 a, float16 b, float16 c,
+ int flags, float_status *status)
+{
+ return float16_muladd_scalbn(a, b, c, 0, flags, status);
+}
+
+float32 QEMU_SOFTFLOAT_ATTR
+float32_muladd_scalbn(float32 a, float32 b, float32 c,
+ int scale, int flags, float_status *status)
{
FloatParts64 pa, pb, pc, *pr;
float32_unpack_canonical(&pa, a, status);
float32_unpack_canonical(&pb, b, status);
float32_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
return float32_round_pack_canonical(pr, status);
}
-static float64 QEMU_SOFTFLOAT_ATTR
-soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
- float_status *status)
+float64 QEMU_SOFTFLOAT_ATTR
+float64_muladd_scalbn(float64 a, float64 b, float64 c,
+ int scale, int flags, float_status *status)
{
FloatParts64 pa, pb, pc, *pr;
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
float64_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, scale, flags, status);
return float64_round_pack_canonical(pr, status);
}
@@ -2323,7 +2330,7 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
return ur.s;
soft:
- return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
+ return float32_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
}
float64 QEMU_FLATTEN
@@ -2394,7 +2401,7 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
return ur.s;
soft:
- return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
+ return float64_muladd_scalbn(ua.s, ub.s, uc.s, 0, flags, s);
}
float64 float64r32_muladd(float64 a, float64 b, float64 c,
@@ -2405,7 +2412,7 @@ float64 float64r32_muladd(float64 a, float64 b, float64 c,
float64_unpack_canonical(&pa, a, status);
float64_unpack_canonical(&pb, b, status);
float64_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
return float64r32_round_pack_canonical(pr, status);
}
@@ -2418,7 +2425,7 @@ bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
bfloat16_unpack_canonical(&pa, a, status);
bfloat16_unpack_canonical(&pb, b, status);
bfloat16_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
return bfloat16_round_pack_canonical(pr, status);
}
@@ -2431,7 +2438,7 @@ float128 QEMU_FLATTEN float128_muladd(float128 a, float128 b, float128 c,
float128_unpack_canonical(&pa, a, status);
float128_unpack_canonical(&pb, b, status);
float128_unpack_canonical(&pc, c, status);
- pr = parts_muladd(&pa, &pb, &pc, flags, status);
+ pr = parts_muladd_scalbn(&pa, &pb, &pc, 0, flags, status);
return float128_round_pack_canonical(pr, status);
}
@@ -5230,8 +5237,9 @@ float32 float32_exp2(float32 a, float_status *status)
float64_unpack_canonical(&rp, float64_one, status);
for (i = 0 ; i < 15 ; i++) {
+
float64_unpack_canonical(&tp, float32_exp2_coefficients[i], status);
- rp = *parts_muladd(&tp, &xnp, &rp, 0, status);
+ rp = *parts_muladd_scalbn(&tp, &xnp, &rp, 0, 0, status);
xnp = *parts_mul(&xnp, &xp, status);
}
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index cc6e06b976..5133358878 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -476,8 +476,9 @@ static FloatPartsN *partsN(mul)(FloatPartsN *a, FloatPartsN *b,
* Requires A and C extracted into a double-sized structure to provide the
* extra space for the widening multiply.
*/
-static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
- FloatPartsN *c, int flags, float_status *s)
+static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
+ FloatPartsN *c, int scale,
+ int flags, float_status *s)
{
int ab_mask, abc_mask;
FloatPartsW p_widen, c_widen;
@@ -566,9 +567,11 @@ static FloatPartsN *partsN(muladd)(FloatPartsN *a, FloatPartsN *b,
a->exp = p_widen.exp;
return_normal:
+ /* TODO: Replace all use of float_muladd_halve_result with scale. */
if (flags & float_muladd_halve_result) {
a->exp -= 1;
}
+ a->exp += scale;
finish_sign:
if (flags & float_muladd_negate_result) {
a->sign ^= 1;
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 01/17] softfloat: Add float{16,32,64}_muladd_scalbn
2024-12-08 22:48 ` [PATCH 01/17] softfloat: Add float{16,32,64}_muladd_scalbn Richard Henderson
@ 2024-12-09 16:06 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 33+ messages in thread
From: Philippe Mathieu-Daudé @ 2024-12-09 16:06 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
On 8/12/24 23:48, Richard Henderson wrote:
> We currently have a flag, float_muladd_halve_result, to scale
> the result by 2**-1. Extend this to handle arbitrary scaling.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> include/fpu/softfloat.h | 6 ++++
> fpu/softfloat.c | 58 ++++++++++++++++++++++-----------------
> fpu/softfloat-parts.c.inc | 7 +++--
> 3 files changed, 44 insertions(+), 27 deletions(-)
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 02/17] target/arm: Use float*_muladd_scalbn
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
2024-12-08 22:48 ` [PATCH 01/17] softfloat: Add float{16,32,64}_muladd_scalbn Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-09 16:08 ` Philippe Mathieu-Daudé
2024-12-08 22:48 ` [PATCH 03/17] target/sparc: " Richard Henderson
` (14 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
Use the scalbn interface instead of float_muladd_halve_result.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/helper-a64.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c
index 8f42a28d07..265a065f6f 100644
--- a/target/arm/tcg/helper-a64.c
+++ b/target/arm/tcg/helper-a64.c
@@ -273,7 +273,7 @@ uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp)
(float16_is_infinity(b) && float16_is_zero(a))) {
return float16_one_point_five;
}
- return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
+ return float16_muladd_scalbn(a, b, float16_three, -1, 0, fpst);
}
float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
@@ -288,7 +288,7 @@ float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
(float32_is_infinity(b) && float32_is_zero(a))) {
return float32_one_point_five;
}
- return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
+ return float32_muladd_scalbn(a, b, float32_three, -1, 0, fpst);
}
float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
@@ -303,7 +303,7 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
(float64_is_infinity(b) && float64_is_zero(a))) {
return float64_one_point_five;
}
- return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
+ return float64_muladd_scalbn(a, b, float64_three, -1, 0, fpst);
}
/* Pairwise long add: add pairs of adjacent elements into
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [PATCH 03/17] target/sparc: Use float*_muladd_scalbn
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
2024-12-08 22:48 ` [PATCH 01/17] softfloat: Add float{16,32,64}_muladd_scalbn Richard Henderson
2024-12-08 22:48 ` [PATCH 02/17] target/arm: Use float*_muladd_scalbn Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-09 16:06 ` Philippe Mathieu-Daudé
2024-12-08 22:48 ` [PATCH 04/17] softfloat: Remove float_muladd_halve_result Richard Henderson
` (13 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
Use the scalbn interface instead of float_muladd_halve_result.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/sparc/helper.h | 4 +-
target/sparc/fop_helper.c | 8 ++--
target/sparc/translate.c | 80 +++++++++++++++++++++++----------------
3 files changed, 54 insertions(+), 38 deletions(-)
diff --git a/target/sparc/helper.h b/target/sparc/helper.h
index 134e519a37..49ace89858 100644
--- a/target/sparc/helper.h
+++ b/target/sparc/helper.h
@@ -59,7 +59,7 @@ DEF_HELPER_FLAGS_3(faddd, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(fsubd, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(fmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(fdivd, TCG_CALL_NO_WG, f64, env, f64, f64)
-DEF_HELPER_FLAGS_5(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, i32)
+DEF_HELPER_FLAGS_6(fmaddd, TCG_CALL_NO_WG, f64, env, f64, f64, f64, s32, i32)
DEF_HELPER_FLAGS_3(fnaddd, TCG_CALL_NO_WG, f64, env, f64, f64)
DEF_HELPER_FLAGS_3(fnmuld, TCG_CALL_NO_WG, f64, env, f64, f64)
@@ -72,7 +72,7 @@ DEF_HELPER_FLAGS_3(fadds, TCG_CALL_NO_WG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fsubs, TCG_CALL_NO_WG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fdivs, TCG_CALL_NO_WG, f32, env, f32, f32)
-DEF_HELPER_FLAGS_5(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, i32)
+DEF_HELPER_FLAGS_6(fmadds, TCG_CALL_NO_WG, f32, env, f32, f32, f32, s32, i32)
DEF_HELPER_FLAGS_3(fnadds, TCG_CALL_NO_WG, f32, env, f32, f32)
DEF_HELPER_FLAGS_3(fnmuls, TCG_CALL_NO_WG, f32, env, f32, f32)
diff --git a/target/sparc/fop_helper.c b/target/sparc/fop_helper.c
index 6f9ccc008a..f4af04f061 100644
--- a/target/sparc/fop_helper.c
+++ b/target/sparc/fop_helper.c
@@ -344,17 +344,17 @@ Int128 helper_fsqrtq(CPUSPARCState *env, Int128 src)
}
float32 helper_fmadds(CPUSPARCState *env, float32 s1,
- float32 s2, float32 s3, uint32_t op)
+ float32 s2, float32 s3, int32_t sc, uint32_t op)
{
- float32 ret = float32_muladd(s1, s2, s3, op, &env->fp_status);
+ float32 ret = float32_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
check_ieee_exceptions(env, GETPC());
return ret;
}
float64 helper_fmaddd(CPUSPARCState *env, float64 s1,
- float64 s2, float64 s3, uint32_t op)
+ float64 s2, float64 s3, int32_t sc, uint32_t op)
{
- float64 ret = float64_muladd(s1, s2, s3, op, &env->fp_status);
+ float64 ret = float64_muladd_scalbn(s1, s2, s3, sc, op, &env->fp_status);
check_ieee_exceptions(env, GETPC());
return ret;
}
diff --git a/target/sparc/translate.c b/target/sparc/translate.c
index cdd0a95c03..005efb13f1 100644
--- a/target/sparc/translate.c
+++ b/target/sparc/translate.c
@@ -1364,93 +1364,109 @@ static void gen_op_fabsq(TCGv_i128 dst, TCGv_i128 src)
static void gen_op_fmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
{
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
+ TCGv_i32 z = tcg_constant_i32(0);
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, z);
}
static void gen_op_fmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
{
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(0));
+ TCGv_i32 z = tcg_constant_i32(0);
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, z);
}
static void gen_op_fmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
{
- int op = float_muladd_negate_c;
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
}
static void gen_op_fmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
{
- int op = float_muladd_negate_c;
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
}
static void gen_op_fnmsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
{
- int op = float_muladd_negate_c | float_muladd_negate_result;
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
+ float_muladd_negate_result);
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
}
static void gen_op_fnmsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
{
- int op = float_muladd_negate_c | float_muladd_negate_result;
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c |
+ float_muladd_negate_result);
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
}
static void gen_op_fnmadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2, TCGv_i32 s3)
{
- int op = float_muladd_negate_result;
- gen_helper_fmadds(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
+ gen_helper_fmadds(d, tcg_env, s1, s2, s3, z, op);
}
static void gen_op_fnmaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2, TCGv_i64 s3)
{
- int op = float_muladd_negate_result;
- gen_helper_fmaddd(d, tcg_env, s1, s2, s3, tcg_constant_i32(op));
+ TCGv_i32 z = tcg_constant_i32(0);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
+ gen_helper_fmaddd(d, tcg_env, s1, s2, s3, z, op);
}
/* Use muladd to compute (1 * src1) + src2 / 2 with one rounding. */
static void gen_op_fhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
{
- TCGv_i32 one = tcg_constant_i32(float32_one);
- int op = float_muladd_halve_result;
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(0);
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
}
static void gen_op_fhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
{
- TCGv_i64 one = tcg_constant_i64(float64_one);
- int op = float_muladd_halve_result;
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(0);
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
}
/* Use muladd to compute (1 * src1) - src2 / 2 with one rounding. */
static void gen_op_fhsubs(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
{
- TCGv_i32 one = tcg_constant_i32(float32_one);
- int op = float_muladd_negate_c | float_muladd_halve_result;
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
}
static void gen_op_fhsubd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
{
- TCGv_i64 one = tcg_constant_i64(float64_one);
- int op = float_muladd_negate_c | float_muladd_halve_result;
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_c);
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
}
/* Use muladd to compute -((1 * src1) + src2 / 2) with one rounding. */
static void gen_op_fnhadds(TCGv_i32 d, TCGv_i32 s1, TCGv_i32 s2)
{
- TCGv_i32 one = tcg_constant_i32(float32_one);
- int op = float_muladd_negate_result | float_muladd_halve_result;
- gen_helper_fmadds(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i32 fone = tcg_constant_i32(float32_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
+ gen_helper_fmadds(d, tcg_env, fone, s1, s2, mone, op);
}
static void gen_op_fnhaddd(TCGv_i64 d, TCGv_i64 s1, TCGv_i64 s2)
{
- TCGv_i64 one = tcg_constant_i64(float64_one);
- int op = float_muladd_negate_result | float_muladd_halve_result;
- gen_helper_fmaddd(d, tcg_env, one, s1, s2, tcg_constant_i32(op));
+ TCGv_i64 fone = tcg_constant_i64(float64_one);
+ TCGv_i32 mone = tcg_constant_i32(-1);
+ TCGv_i32 op = tcg_constant_i32(float_muladd_negate_result);
+ gen_helper_fmaddd(d, tcg_env, fone, s1, s2, mone, op);
}
static void gen_op_fpexception_im(DisasContext *dc, int ftt)
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 03/17] target/sparc: Use float*_muladd_scalbn
2024-12-08 22:48 ` [PATCH 03/17] target/sparc: " Richard Henderson
@ 2024-12-09 16:06 ` Philippe Mathieu-Daudé
0 siblings, 0 replies; 33+ messages in thread
From: Philippe Mathieu-Daudé @ 2024-12-09 16:06 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
On 8/12/24 23:48, Richard Henderson wrote:
> Use the scalbn interface instead of float_muladd_halve_result.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/sparc/helper.h | 4 +-
> target/sparc/fop_helper.c | 8 ++--
> target/sparc/translate.c | 80 +++++++++++++++++++++++----------------
> 3 files changed, 54 insertions(+), 38 deletions(-)
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 04/17] softfloat: Remove float_muladd_halve_result
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (2 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 03/17] target/sparc: " Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-09 16:08 ` Philippe Mathieu-Daudé
2024-12-08 22:48 ` [PATCH 05/17] softfloat: Add float_round_nearest_even_max Richard Henderson
` (12 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
All uses have been convered to float*_muladd_scalbn.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat.h | 3 ---
fpu/softfloat.c | 6 ------
fpu/softfloat-parts.c.inc | 4 ----
3 files changed, 13 deletions(-)
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index c34ce0477d..aa69aecfb0 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -120,14 +120,11 @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
| Using these differs from negating an input or output before calling
| the muladd function in that this means that a NaN doesn't have its
| sign bit inverted before it is propagated.
-| We also support halving the result before rounding, as a special
-| case to support the ARM fused-sqrt-step instruction FRSQRTS.
*----------------------------------------------------------------------------*/
enum {
float_muladd_negate_c = 1,
float_muladd_negate_product = 2,
float_muladd_negate_result = 4,
- float_muladd_halve_result = 8,
};
/*----------------------------------------------------------------------------
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index a4174de692..81e7a7524b 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -2274,9 +2274,6 @@ float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
if (unlikely(!can_use_fpu(s))) {
goto soft;
}
- if (unlikely(flags & float_muladd_halve_result)) {
- goto soft;
- }
float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
if (unlikely(!f32_is_zon3(ua, ub, uc))) {
@@ -2345,9 +2342,6 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
if (unlikely(!can_use_fpu(s))) {
goto soft;
}
- if (unlikely(flags & float_muladd_halve_result)) {
- goto soft;
- }
float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
if (unlikely(!f64_is_zon3(ua, ub, uc))) {
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 5133358878..5b5969725b 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -567,10 +567,6 @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
a->exp = p_widen.exp;
return_normal:
- /* TODO: Replace all use of float_muladd_halve_result with scale. */
- if (flags & float_muladd_halve_result) {
- a->exp -= 1;
- }
a->exp += scale;
finish_sign:
if (flags & float_muladd_negate_result) {
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [PATCH 05/17] softfloat: Add float_round_nearest_even_max
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (3 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 04/17] softfloat: Remove float_muladd_halve_result Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-08 22:48 ` [PATCH 06/17] softfloat: Add float_muladd_suppress_add_product_zero Richard Henderson
` (11 subsequent siblings)
16 siblings, 0 replies; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
This rounding mode is used by Hexagon.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat-types.h | 2 ++
fpu/softfloat-parts.c.inc | 3 +++
2 files changed, 5 insertions(+)
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index 8f39691dfd..c6429665ce 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -138,6 +138,8 @@ typedef enum __attribute__((__packed__)) {
float_round_to_odd = 5,
/* Not an IEEE rounding mode: round to closest odd, overflow to inf */
float_round_to_odd_inf = 6,
+ /* Not an IEEE rounding mode: round to nearest even, overflow to max */
+ float_round_nearest_even_max = 7,
} FloatRoundMode;
/*
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 5b5969725b..2b6adeef4c 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -155,6 +155,9 @@ static void partsN(uncanon_normal)(FloatPartsN *p, float_status *s,
int exp, flags = 0;
switch (s->float_rounding_mode) {
+ case float_round_nearest_even_max:
+ overflow_norm = true;
+ /* fall through */
case float_round_nearest_even:
if (N > 64 && frac_lsb == 0) {
inc = ((p->frac_hi & 1) || (p->frac_lo & round_mask) != frac_lsbm1
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [PATCH 06/17] softfloat: Add float_muladd_suppress_add_product_zero
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (4 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 05/17] softfloat: Add float_round_nearest_even_max Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-08 22:48 ` [PATCH 07/17] target/hexagon: Use float32_mul in helper_sfmpy Richard Henderson
` (10 subsequent siblings)
16 siblings, 0 replies; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
Certain Hexagon instructions suppress changes to the result
when the product of fma() is a true zero.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/fpu/softfloat.h | 5 +++++
fpu/softfloat-parts.c.inc | 4 +++-
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index aa69aecfb0..09a40b4310 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -120,11 +120,16 @@ bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
| Using these differs from negating an input or output before calling
| the muladd function in that this means that a NaN doesn't have its
| sign bit inverted before it is propagated.
+|
+| With float_muladd_suppress_add_product_zero, if A or B is zero
+| such that the product is a true zero, then return C without addition.
+| This preserves the sign of C when C is +/- 0. Used for Hexagon.
*----------------------------------------------------------------------------*/
enum {
float_muladd_negate_c = 1,
float_muladd_negate_product = 2,
float_muladd_negate_result = 4,
+ float_muladd_suppress_add_product_zero = 8,
};
/*----------------------------------------------------------------------------
diff --git a/fpu/softfloat-parts.c.inc b/fpu/softfloat-parts.c.inc
index 2b6adeef4c..015b272e30 100644
--- a/fpu/softfloat-parts.c.inc
+++ b/fpu/softfloat-parts.c.inc
@@ -529,7 +529,9 @@ static FloatPartsN *partsN(muladd_scalbn)(FloatPartsN *a, FloatPartsN *b,
goto return_normal;
}
if (c->cls == float_class_zero) {
- if (a->sign != c->sign) {
+ if (flags & float_muladd_suppress_add_product_zero) {
+ a->sign = c->sign;
+ } else if (a->sign != c->sign) {
goto return_sub_zero;
}
goto return_zero;
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* [PATCH 07/17] target/hexagon: Use float32_mul in helper_sfmpy
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (5 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 06/17] softfloat: Add float_muladd_suppress_add_product_zero Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-10 1:45 ` Brian Cain
2024-12-08 22:48 ` [PATCH 08/17] target/hexagon: Use float32_muladd for helper_sffma Richard Henderson
` (9 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
There are no special cases for this instruction.
Remove internal_mpyf as unused.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/hexagon/fma_emu.h | 1 -
target/hexagon/fma_emu.c | 8 --------
target/hexagon/op_helper.c | 2 +-
3 files changed, 1 insertion(+), 10 deletions(-)
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
index 91591d6050..ad5df5d038 100644
--- a/target/hexagon/fma_emu.h
+++ b/target/hexagon/fma_emu.h
@@ -32,7 +32,6 @@ int32_t float32_getexp(float32 f32);
float32 infinite_float32(uint8_t sign);
float32 internal_fmafx(float32 a, float32 b, float32 c,
int scale, float_status *fp_status);
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status);
float64 internal_mpyhh(float64 a, float64 b,
unsigned long long int accumulated,
float_status *fp_status);
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
index 05a56d8c10..35971b8b99 100644
--- a/target/hexagon/fma_emu.c
+++ b/target/hexagon/fma_emu.c
@@ -655,14 +655,6 @@ float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
return accum_round_float32(result, fp_status);
}
-float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
-{
- if (float32_is_zero(a) || float32_is_zero(b)) {
- return float32_mul(a, b, fp_status);
- }
- return internal_fmafx(a, b, float32_zero, 0, fp_status);
-}
-
float64 internal_mpyhh(float64 a, float64 b,
unsigned long long int accumulated,
float_status *fp_status)
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 90e7aaa097..b8b556f4c6 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -1157,7 +1157,7 @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
{
float32 RdV;
arch_fpop_start(env);
- RdV = internal_mpyf(RsV, RtV, &env->fp_status);
+ RdV = float32_mul(RsV, RtV, &env->fp_status);
arch_fpop_end(env);
return RdV;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 07/17] target/hexagon: Use float32_mul in helper_sfmpy
2024-12-08 22:48 ` [PATCH 07/17] target/hexagon: Use float32_mul in helper_sfmpy Richard Henderson
@ 2024-12-10 1:45 ` Brian Cain
0 siblings, 0 replies; 33+ messages in thread
From: Brian Cain @ 2024-12-10 1:45 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
On 12/8/2024 4:48 PM, Richard Henderson wrote:
> There are no special cases for this instruction.
> Remove internal_mpyf as unused.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/hexagon/fma_emu.h | 1 -
> target/hexagon/fma_emu.c | 8 --------
> target/hexagon/op_helper.c | 2 +-
> 3 files changed, 1 insertion(+), 10 deletions(-)
>
> diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
> index 91591d6050..ad5df5d038 100644
> --- a/target/hexagon/fma_emu.h
> +++ b/target/hexagon/fma_emu.h
> @@ -32,7 +32,6 @@ int32_t float32_getexp(float32 f32);
> float32 infinite_float32(uint8_t sign);
> float32 internal_fmafx(float32 a, float32 b, float32 c,
> int scale, float_status *fp_status);
> -float32 internal_mpyf(float32 a, float32 b, float_status *fp_status);
> float64 internal_mpyhh(float64 a, float64 b,
> unsigned long long int accumulated,
> float_status *fp_status);
> diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
> index 05a56d8c10..35971b8b99 100644
> --- a/target/hexagon/fma_emu.c
> +++ b/target/hexagon/fma_emu.c
> @@ -655,14 +655,6 @@ float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
> return accum_round_float32(result, fp_status);
> }
>
> -float32 internal_mpyf(float32 a, float32 b, float_status *fp_status)
> -{
> - if (float32_is_zero(a) || float32_is_zero(b)) {
> - return float32_mul(a, b, fp_status);
> - }
> - return internal_fmafx(a, b, float32_zero, 0, fp_status);
> -}
> -
> float64 internal_mpyhh(float64 a, float64 b,
> unsigned long long int accumulated,
> float_status *fp_status)
> diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
> index 90e7aaa097..b8b556f4c6 100644
> --- a/target/hexagon/op_helper.c
> +++ b/target/hexagon/op_helper.c
> @@ -1157,7 +1157,7 @@ float32 HELPER(sfmpy)(CPUHexagonState *env, float32 RsV, float32 RtV)
> {
> float32 RdV;
> arch_fpop_start(env);
> - RdV = internal_mpyf(RsV, RtV, &env->fp_status);
> + RdV = float32_mul(RsV, RtV, &env->fp_status);
> arch_fpop_end(env);
> return RdV;
> }
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 08/17] target/hexagon: Use float32_muladd for helper_sffma
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (6 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 07/17] target/hexagon: Use float32_mul in helper_sfmpy Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-10 1:46 ` Brian Cain
2024-12-08 22:48 ` [PATCH 09/17] target/hexagon: Use float32_muladd for helper_sffms Richard Henderson
` (8 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
There are no special cases for this instruction.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/hexagon/op_helper.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index b8b556f4c6..7d459cc6f3 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -1166,7 +1166,7 @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV)
{
arch_fpop_start(env);
- RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
+ RxV = float32_muladd(RsV, RtV, RxV, 0, &env->fp_status);
arch_fpop_end(env);
return RxV;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 08/17] target/hexagon: Use float32_muladd for helper_sffma
2024-12-08 22:48 ` [PATCH 08/17] target/hexagon: Use float32_muladd for helper_sffma Richard Henderson
@ 2024-12-10 1:46 ` Brian Cain
0 siblings, 0 replies; 33+ messages in thread
From: Brian Cain @ 2024-12-10 1:46 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell, mark.cave-ayland, brian.cain
On 12/8/2024 4:48 PM, Richard Henderson wrote:
> There are no special cases for this instruction.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/hexagon/op_helper.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
> index b8b556f4c6..7d459cc6f3 100644
> --- a/target/hexagon/op_helper.c
> +++ b/target/hexagon/op_helper.c
> @@ -1166,7 +1166,7 @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
> float32 RsV, float32 RtV)
> {
> arch_fpop_start(env);
> - RxV = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
> + RxV = float32_muladd(RsV, RtV, RxV, 0, &env->fp_status);
> arch_fpop_end(env);
> return RxV;
> }
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 09/17] target/hexagon: Use float32_muladd for helper_sffms
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (7 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 08/17] target/hexagon: Use float32_muladd for helper_sffma Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-10 1:48 ` Brian Cain
2024-12-08 22:48 ` [PATCH 10/17] target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc Richard Henderson
` (7 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
There are no special cases for this instruction. Since hexagon
always uses default-nan mode, explicitly negating the first
input is unnecessary. Use float_muladd_negate_product instead.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/hexagon/op_helper.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index 7d459cc6f3..aa5ab4a31f 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -1208,10 +1208,9 @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV)
{
- float32 neg_RsV;
arch_fpop_start(env);
- neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
- RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
+ RxV = float32_muladd(RsV, RtV, RxV, float_muladd_negate_product,
+ &env->fp_status);
arch_fpop_end(env);
return RxV;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 09/17] target/hexagon: Use float32_muladd for helper_sffms
2024-12-08 22:48 ` [PATCH 09/17] target/hexagon: Use float32_muladd for helper_sffms Richard Henderson
@ 2024-12-10 1:48 ` Brian Cain
0 siblings, 0 replies; 33+ messages in thread
From: Brian Cain @ 2024-12-10 1:48 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell, mark.cave-ayland, Brian Cain
On 12/8/2024 4:48 PM, Richard Henderson wrote:
> There are no special cases for this instruction. Since hexagon
> always uses default-nan mode, explicitly negating the first
> input is unnecessary. Use float_muladd_negate_product instead.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/hexagon/op_helper.c | 5 ++---
> 1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
> index 7d459cc6f3..aa5ab4a31f 100644
> --- a/target/hexagon/op_helper.c
> +++ b/target/hexagon/op_helper.c
> @@ -1208,10 +1208,9 @@ float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
> float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
> float32 RsV, float32 RtV)
> {
> - float32 neg_RsV;
> arch_fpop_start(env);
> - neg_RsV = float32_set_sign(RsV, float32_is_neg(RsV) ? 0 : 1);
> - RxV = internal_fmafx(neg_RsV, RtV, RxV, 0, &env->fp_status);
> + RxV = float32_muladd(RsV, RtV, RxV, float_muladd_negate_product,
> + &env->fp_status);
> arch_fpop_end(env);
> return RxV;
> }
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 10/17] target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (8 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 09/17] target/hexagon: Use float32_muladd for helper_sffms Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-10 2:01 ` Brian Cain
2024-12-08 22:48 ` [PATCH 11/17] target/hexagon: Use float32_muladd for helper_sffm[as]_lib Richard Henderson
` (6 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
This instruction has a special case that 0 * x + c returns c
without the normal sign folding that comes with 0 + -0.
Use the new float_muladd_suppress_add_product_zero to
describe this.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/hexagon/op_helper.c | 11 +++--------
1 file changed, 3 insertions(+), 8 deletions(-)
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index aa5ab4a31f..eb010422bf 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -1192,15 +1192,10 @@ static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV, float32 PuV)
{
- size4s_t tmp;
arch_fpop_start(env);
- RxV = check_nan(RxV, RxV, &env->fp_status);
- RxV = check_nan(RxV, RsV, &env->fp_status);
- RxV = check_nan(RxV, RtV, &env->fp_status);
- tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
- RxV = tmp;
- }
+ RxV = float32_muladd_scalbn(RsV, RtV, RxV, fSXTN(8, 64, PuV),
+ float_muladd_suppress_add_product_zero,
+ &env->fp_status);
arch_fpop_end(env);
return RxV;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 10/17] target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc
2024-12-08 22:48 ` [PATCH 10/17] target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc Richard Henderson
@ 2024-12-10 2:01 ` Brian Cain
0 siblings, 0 replies; 33+ messages in thread
From: Brian Cain @ 2024-12-10 2:01 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell, mark.cave-ayland, Brian Cain
On 12/8/2024 4:48 PM, Richard Henderson wrote:
> This instruction has a special case that 0 * x + c returns c
> without the normal sign folding that comes with 0 + -0.
> Use the new float_muladd_suppress_add_product_zero to
> describe this.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/hexagon/op_helper.c | 11 +++--------
> 1 file changed, 3 insertions(+), 8 deletions(-)
>
> diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
> index aa5ab4a31f..eb010422bf 100644
> --- a/target/hexagon/op_helper.c
> +++ b/target/hexagon/op_helper.c
> @@ -1192,15 +1192,10 @@ static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
> float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
> float32 RsV, float32 RtV, float32 PuV)
> {
> - size4s_t tmp;
> arch_fpop_start(env);
> - RxV = check_nan(RxV, RxV, &env->fp_status);
> - RxV = check_nan(RxV, RsV, &env->fp_status);
> - RxV = check_nan(RxV, RtV, &env->fp_status);
> - tmp = internal_fmafx(RsV, RtV, RxV, fSXTN(8, 64, PuV), &env->fp_status);
> - if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
> - RxV = tmp;
> - }
> + RxV = float32_muladd_scalbn(RsV, RtV, RxV, fSXTN(8, 64, PuV),
> + float_muladd_suppress_add_product_zero,
> + &env->fp_status);
> arch_fpop_end(env);
> return RxV;
> }
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 11/17] target/hexagon: Use float32_muladd for helper_sffm[as]_lib
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (9 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 10/17] target/hexagon: Use float32_muladd_scalbn for helper_sffma_sc Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-10 15:29 ` Brian Cain
2024-12-08 22:48 ` [PATCH 12/17] target/hexagon: Remove internal_fmafx Richard Henderson
` (5 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
There are multiple special cases for this instruction.
(1) The saturate to normal maximum instead of overflow to infinity is
handled by the new float_round_nearest_even_max rounding mode.
(2) The 0 * n + c special case is handled by the new
float_muladd_suppress_add_product_zero flag.
(3) The Inf - Inf -> 0 special case can be detected after the fact
by examining float_flag_invalid_isi.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/hexagon/op_helper.c | 105 +++++++++----------------------------
1 file changed, 26 insertions(+), 79 deletions(-)
diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
index eb010422bf..26e329f7b9 100644
--- a/target/hexagon/op_helper.c
+++ b/target/hexagon/op_helper.c
@@ -1171,24 +1171,6 @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
return RxV;
}
-static bool is_zero_prod(float32 a, float32 b)
-{
- return ((float32_is_zero(a) && is_finite(b)) ||
- (float32_is_zero(b) && is_finite(a)));
-}
-
-static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
-{
- float32 ret = dst;
- if (float32_is_any_nan(x)) {
- if (extract32(x, 22, 1) == 0) {
- float_raise(float_flag_invalid, fp_status);
- }
- ret = make_float32(0xffffffff); /* nan */
- }
- return ret;
-}
-
float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV, float32 PuV)
{
@@ -1210,78 +1192,43 @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
return RxV;
}
-static bool is_inf_prod(int32_t a, int32_t b)
+static float32 do_sffma_lib(CPUHexagonState *env, float32 RxV,
+ float32 RsV, float32 RtV, int negate)
{
- return (float32_is_infinity(a) && float32_is_infinity(b)) ||
- (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
- (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
+ int flags;
+
+ arch_fpop_start(env);
+
+ set_float_rounding_mode(float_round_nearest_even_max, &env->fp_status);
+ RxV = float32_muladd(RsV, RtV, RxV,
+ negate | float_muladd_suppress_add_product_zero,
+ &env->fp_status);
+
+ flags = get_float_exception_flags(&env->fp_status);
+ if (flags) {
+ /* Flags are suppressed by this instruction. */
+ set_float_exception_flags(0, &env->fp_status);
+
+ /* Return 0 for Inf - Inf. */
+ if (flags & float_flag_invalid_isi) {
+ RxV = 0;
+ }
+ }
+
+ arch_fpop_end(env);
+ return RxV;
}
float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV)
{
- bool infinp;
- bool infminusinf;
- float32 tmp;
-
- arch_fpop_start(env);
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
- infminusinf = float32_is_infinity(RxV) &&
- is_inf_prod(RsV, RtV) &&
- (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
- infinp = float32_is_infinity(RxV) ||
- float32_is_infinity(RtV) ||
- float32_is_infinity(RsV);
- RxV = check_nan(RxV, RxV, &env->fp_status);
- RxV = check_nan(RxV, RsV, &env->fp_status);
- RxV = check_nan(RxV, RtV, &env->fp_status);
- tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
- RxV = tmp;
- }
- set_float_exception_flags(0, &env->fp_status);
- if (float32_is_infinity(RxV) && !infinp) {
- RxV = RxV - 1;
- }
- if (infminusinf) {
- RxV = 0;
- }
- arch_fpop_end(env);
- return RxV;
+ return do_sffma_lib(env, RxV, RsV, RtV, 0);
}
float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
float32 RsV, float32 RtV)
{
- bool infinp;
- bool infminusinf;
- float32 tmp;
-
- arch_fpop_start(env);
- set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
- infminusinf = float32_is_infinity(RxV) &&
- is_inf_prod(RsV, RtV) &&
- (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
- infinp = float32_is_infinity(RxV) ||
- float32_is_infinity(RtV) ||
- float32_is_infinity(RsV);
- RxV = check_nan(RxV, RxV, &env->fp_status);
- RxV = check_nan(RxV, RsV, &env->fp_status);
- RxV = check_nan(RxV, RtV, &env->fp_status);
- float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
- tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
- if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
- RxV = tmp;
- }
- set_float_exception_flags(0, &env->fp_status);
- if (float32_is_infinity(RxV) && !infinp) {
- RxV = RxV - 1;
- }
- if (infminusinf) {
- RxV = 0;
- }
- arch_fpop_end(env);
- return RxV;
+ return do_sffma_lib(env, RxV, RsV, RtV, float_muladd_negate_product);
}
float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 11/17] target/hexagon: Use float32_muladd for helper_sffm[as]_lib
2024-12-08 22:48 ` [PATCH 11/17] target/hexagon: Use float32_muladd for helper_sffm[as]_lib Richard Henderson
@ 2024-12-10 15:29 ` Brian Cain
0 siblings, 0 replies; 33+ messages in thread
From: Brian Cain @ 2024-12-10 15:29 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell, mark.cave-ayland, Brian Cain
On 12/8/2024 4:48 PM, Richard Henderson wrote:
> There are multiple special cases for this instruction.
> (1) The saturate to normal maximum instead of overflow to infinity is
> handled by the new float_round_nearest_even_max rounding mode.
> (2) The 0 * n + c special case is handled by the new
> float_muladd_suppress_add_product_zero flag.
> (3) The Inf - Inf -> 0 special case can be detected after the fact
> by examining float_flag_invalid_isi.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/hexagon/op_helper.c | 105 +++++++++----------------------------
> 1 file changed, 26 insertions(+), 79 deletions(-)
>
> diff --git a/target/hexagon/op_helper.c b/target/hexagon/op_helper.c
> index eb010422bf..26e329f7b9 100644
> --- a/target/hexagon/op_helper.c
> +++ b/target/hexagon/op_helper.c
> @@ -1171,24 +1171,6 @@ float32 HELPER(sffma)(CPUHexagonState *env, float32 RxV,
> return RxV;
> }
>
> -static bool is_zero_prod(float32 a, float32 b)
> -{
> - return ((float32_is_zero(a) && is_finite(b)) ||
> - (float32_is_zero(b) && is_finite(a)));
> -}
> -
> -static float32 check_nan(float32 dst, float32 x, float_status *fp_status)
> -{
> - float32 ret = dst;
> - if (float32_is_any_nan(x)) {
> - if (extract32(x, 22, 1) == 0) {
> - float_raise(float_flag_invalid, fp_status);
> - }
> - ret = make_float32(0xffffffff); /* nan */
> - }
> - return ret;
> -}
> -
> float32 HELPER(sffma_sc)(CPUHexagonState *env, float32 RxV,
> float32 RsV, float32 RtV, float32 PuV)
> {
> @@ -1210,78 +1192,43 @@ float32 HELPER(sffms)(CPUHexagonState *env, float32 RxV,
> return RxV;
> }
>
> -static bool is_inf_prod(int32_t a, int32_t b)
> +static float32 do_sffma_lib(CPUHexagonState *env, float32 RxV,
> + float32 RsV, float32 RtV, int negate)
> {
> - return (float32_is_infinity(a) && float32_is_infinity(b)) ||
> - (float32_is_infinity(a) && is_finite(b) && !float32_is_zero(b)) ||
> - (float32_is_infinity(b) && is_finite(a) && !float32_is_zero(a));
> + int flags;
> +
> + arch_fpop_start(env);
> +
> + set_float_rounding_mode(float_round_nearest_even_max, &env->fp_status);
> + RxV = float32_muladd(RsV, RtV, RxV,
> + negate | float_muladd_suppress_add_product_zero,
> + &env->fp_status);
> +
> + flags = get_float_exception_flags(&env->fp_status);
> + if (flags) {
> + /* Flags are suppressed by this instruction. */
> + set_float_exception_flags(0, &env->fp_status);
> +
> + /* Return 0 for Inf - Inf. */
> + if (flags & float_flag_invalid_isi) {
> + RxV = 0;
> + }
> + }
> +
> + arch_fpop_end(env);
> + return RxV;
> }
>
> float32 HELPER(sffma_lib)(CPUHexagonState *env, float32 RxV,
> float32 RsV, float32 RtV)
> {
> - bool infinp;
> - bool infminusinf;
> - float32 tmp;
> -
> - arch_fpop_start(env);
> - set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
> - infminusinf = float32_is_infinity(RxV) &&
> - is_inf_prod(RsV, RtV) &&
> - (fGETBIT(31, RsV ^ RxV ^ RtV) != 0);
> - infinp = float32_is_infinity(RxV) ||
> - float32_is_infinity(RtV) ||
> - float32_is_infinity(RsV);
> - RxV = check_nan(RxV, RxV, &env->fp_status);
> - RxV = check_nan(RxV, RsV, &env->fp_status);
> - RxV = check_nan(RxV, RtV, &env->fp_status);
> - tmp = internal_fmafx(RsV, RtV, RxV, 0, &env->fp_status);
> - if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
> - RxV = tmp;
> - }
> - set_float_exception_flags(0, &env->fp_status);
> - if (float32_is_infinity(RxV) && !infinp) {
> - RxV = RxV - 1;
> - }
> - if (infminusinf) {
> - RxV = 0;
> - }
> - arch_fpop_end(env);
> - return RxV;
> + return do_sffma_lib(env, RxV, RsV, RtV, 0);
> }
>
> float32 HELPER(sffms_lib)(CPUHexagonState *env, float32 RxV,
> float32 RsV, float32 RtV)
> {
> - bool infinp;
> - bool infminusinf;
> - float32 tmp;
> -
> - arch_fpop_start(env);
> - set_float_rounding_mode(float_round_nearest_even, &env->fp_status);
> - infminusinf = float32_is_infinity(RxV) &&
> - is_inf_prod(RsV, RtV) &&
> - (fGETBIT(31, RsV ^ RxV ^ RtV) == 0);
> - infinp = float32_is_infinity(RxV) ||
> - float32_is_infinity(RtV) ||
> - float32_is_infinity(RsV);
> - RxV = check_nan(RxV, RxV, &env->fp_status);
> - RxV = check_nan(RxV, RsV, &env->fp_status);
> - RxV = check_nan(RxV, RtV, &env->fp_status);
> - float32 minus_RsV = float32_sub(float32_zero, RsV, &env->fp_status);
> - tmp = internal_fmafx(minus_RsV, RtV, RxV, 0, &env->fp_status);
> - if (!(float32_is_zero(RxV) && is_zero_prod(RsV, RtV))) {
> - RxV = tmp;
> - }
> - set_float_exception_flags(0, &env->fp_status);
> - if (float32_is_infinity(RxV) && !infinp) {
> - RxV = RxV - 1;
> - }
> - if (infminusinf) {
> - RxV = 0;
> - }
> - arch_fpop_end(env);
> - return RxV;
> + return do_sffma_lib(env, RxV, RsV, RtV, float_muladd_negate_product);
> }
>
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
> float64 HELPER(dfmpyfix)(CPUHexagonState *env, float64 RssV, float64 RttV)
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 12/17] target/hexagon: Remove internal_fmafx
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (10 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 11/17] target/hexagon: Use float32_muladd for helper_sffm[as]_lib Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-10 21:52 ` Brian Cain
2024-12-08 22:48 ` [PATCH 13/17] target/hexagon: Expand GEN_XF_ROUND Richard Henderson
` (4 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
The function is now unused.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/hexagon/fma_emu.h | 2 -
target/hexagon/fma_emu.c | 171 ---------------------------------------
2 files changed, 173 deletions(-)
diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
index ad5df5d038..fed054b609 100644
--- a/target/hexagon/fma_emu.h
+++ b/target/hexagon/fma_emu.h
@@ -30,8 +30,6 @@ static inline uint32_t float32_getexp_raw(float32 f32)
}
int32_t float32_getexp(float32 f32);
float32 infinite_float32(uint8_t sign);
-float32 internal_fmafx(float32 a, float32 b, float32 c,
- int scale, float_status *fp_status);
float64 internal_mpyhh(float64 a, float64 b,
unsigned long long int accumulated,
float_status *fp_status);
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
index 35971b8b99..0c7c7f636c 100644
--- a/target/hexagon/fma_emu.c
+++ b/target/hexagon/fma_emu.c
@@ -90,21 +90,6 @@ int32_t float64_getexp(float64 f64)
return -1;
}
-static uint64_t float32_getmant(float32 f32)
-{
- Float a = { .i = f32 };
- if (float32_is_normal(f32)) {
- return a.mant | 1ULL << 23;
- }
- if (float32_is_zero(f32)) {
- return 0;
- }
- if (float32_is_denormal(f32)) {
- return a.mant;
- }
- return ~0ULL;
-}
-
int32_t float32_getexp(float32 f32)
{
Float a = { .i = f32 };
@@ -369,25 +354,6 @@ float32 infinite_float32(uint8_t sign)
}
/* Return a maximum finite value with the requested sign */
-static float32 maxfinite_float32(uint8_t sign)
-{
- if (sign) {
- return make_float32(SF_MINUS_MAXF);
- } else {
- return make_float32(SF_MAXF);
- }
-}
-
-/* Return a zero value with requested sign */
-static float32 zero_float32(uint8_t sign)
-{
- if (sign) {
- return make_float32(0x80000000);
- } else {
- return float32_zero;
- }
-}
-
#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
{ \
@@ -517,143 +483,6 @@ static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
}
GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
-GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
-
-static bool is_inf_prod(float64 a, float64 b)
-{
- return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
- (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
- (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
-}
-
-static float64 special_fma(float64 a, float64 b, float64 c,
- float_status *fp_status)
-{
- float64 ret = make_float64(0);
-
- /*
- * If A multiplied by B is an exact infinity and C is also an infinity
- * but with the opposite sign, FMA returns NaN and raises invalid.
- */
- uint8_t a_sign = float64_is_neg(a);
- uint8_t b_sign = float64_is_neg(b);
- uint8_t c_sign = float64_is_neg(c);
- if (is_inf_prod(a, b) && float64_is_infinity(c)) {
- if ((a_sign ^ b_sign) != c_sign) {
- ret = make_float64(DF_NAN);
- float_raise(float_flag_invalid, fp_status);
- return ret;
- }
- }
- if ((float64_is_infinity(a) && float64_is_zero(b)) ||
- (float64_is_zero(a) && float64_is_infinity(b))) {
- ret = make_float64(DF_NAN);
- float_raise(float_flag_invalid, fp_status);
- return ret;
- }
- /*
- * If none of the above checks are true and C is a NaN,
- * a NaN shall be returned
- * If A or B are NaN, a NAN shall be returned.
- */
- if (float64_is_any_nan(a) ||
- float64_is_any_nan(b) ||
- float64_is_any_nan(c)) {
- if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
- float_raise(float_flag_invalid, fp_status);
- }
- if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
- float_raise(float_flag_invalid, fp_status);
- }
- if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
- float_raise(float_flag_invalid, fp_status);
- }
- ret = make_float64(DF_NAN);
- return ret;
- }
- /*
- * We have checked for adding opposite-signed infinities.
- * Other infinities return infinity with the correct sign
- */
- if (float64_is_infinity(c)) {
- ret = infinite_float64(c_sign);
- return ret;
- }
- if (float64_is_infinity(a) || float64_is_infinity(b)) {
- ret = infinite_float64(a_sign ^ b_sign);
- return ret;
- }
- g_assert_not_reached();
-}
-
-static float32 special_fmaf(float32 a, float32 b, float32 c,
- float_status *fp_status)
-{
- float64 aa, bb, cc;
- aa = float32_to_float64(a, fp_status);
- bb = float32_to_float64(b, fp_status);
- cc = float32_to_float64(c, fp_status);
- return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
-}
-
-float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
- float_status *fp_status)
-{
- Accum prod;
- Accum acc;
- Accum result;
- accum_init(&prod);
- accum_init(&acc);
- accum_init(&result);
-
- uint8_t a_sign = float32_is_neg(a);
- uint8_t b_sign = float32_is_neg(b);
- uint8_t c_sign = float32_is_neg(c);
- if (float32_is_infinity(a) ||
- float32_is_infinity(b) ||
- float32_is_infinity(c)) {
- return special_fmaf(a, b, c, fp_status);
- }
- if (float32_is_any_nan(a) ||
- float32_is_any_nan(b) ||
- float32_is_any_nan(c)) {
- return special_fmaf(a, b, c, fp_status);
- }
- if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
- float32 tmp = float32_mul(a, b, fp_status);
- tmp = float32_add(tmp, c, fp_status);
- return tmp;
- }
-
- /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
- prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
-
- /*
- * Note: extracting the mantissa into an int is multiplying by
- * 2**23, so adjust here
- */
- prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
- prod.sign = a_sign ^ b_sign;
- if (float32_is_zero(a) || float32_is_zero(b)) {
- prod.exp = -2 * WAY_BIG_EXP;
- }
- if ((scale > 0) && float32_is_denormal(c)) {
- acc.mant = int128_mul_6464(0, 0);
- acc.exp = -WAY_BIG_EXP;
- acc.sign = c_sign;
- acc.sticky = 1;
- result = accum_add(prod, acc);
- } else if (!float32_is_zero(c)) {
- acc.mant = int128_mul_6464(float32_getmant(c), 1);
- acc.exp = float32_getexp(c);
- acc.sign = c_sign;
- result = accum_add(prod, acc);
- } else {
- result = prod;
- }
- result.exp += scale;
- return accum_round_float32(result, fp_status);
-}
float64 internal_mpyhh(float64 a, float64 b,
unsigned long long int accumulated,
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 12/17] target/hexagon: Remove internal_fmafx
2024-12-08 22:48 ` [PATCH 12/17] target/hexagon: Remove internal_fmafx Richard Henderson
@ 2024-12-10 21:52 ` Brian Cain
0 siblings, 0 replies; 33+ messages in thread
From: Brian Cain @ 2024-12-10 21:52 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell, mark.cave-ayland, Brian Cain
On 12/8/2024 4:48 PM, Richard Henderson wrote:
> The function is now unused.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/hexagon/fma_emu.h | 2 -
> target/hexagon/fma_emu.c | 171 ---------------------------------------
> 2 files changed, 173 deletions(-)
>
> diff --git a/target/hexagon/fma_emu.h b/target/hexagon/fma_emu.h
> index ad5df5d038..fed054b609 100644
> --- a/target/hexagon/fma_emu.h
> +++ b/target/hexagon/fma_emu.h
> @@ -30,8 +30,6 @@ static inline uint32_t float32_getexp_raw(float32 f32)
> }
> int32_t float32_getexp(float32 f32);
> float32 infinite_float32(uint8_t sign);
> -float32 internal_fmafx(float32 a, float32 b, float32 c,
> - int scale, float_status *fp_status);
> float64 internal_mpyhh(float64 a, float64 b,
> unsigned long long int accumulated,
> float_status *fp_status);
> diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
> index 35971b8b99..0c7c7f636c 100644
> --- a/target/hexagon/fma_emu.c
> +++ b/target/hexagon/fma_emu.c
> @@ -90,21 +90,6 @@ int32_t float64_getexp(float64 f64)
> return -1;
> }
>
> -static uint64_t float32_getmant(float32 f32)
> -{
> - Float a = { .i = f32 };
> - if (float32_is_normal(f32)) {
> - return a.mant | 1ULL << 23;
> - }
> - if (float32_is_zero(f32)) {
> - return 0;
> - }
> - if (float32_is_denormal(f32)) {
> - return a.mant;
> - }
> - return ~0ULL;
> -}
> -
> int32_t float32_getexp(float32 f32)
> {
> Float a = { .i = f32 };
> @@ -369,25 +354,6 @@ float32 infinite_float32(uint8_t sign)
> }
>
> /* Return a maximum finite value with the requested sign */
> -static float32 maxfinite_float32(uint8_t sign)
> -{
> - if (sign) {
> - return make_float32(SF_MINUS_MAXF);
> - } else {
> - return make_float32(SF_MAXF);
> - }
> -}
> -
> -/* Return a zero value with requested sign */
> -static float32 zero_float32(uint8_t sign)
> -{
> - if (sign) {
> - return make_float32(0x80000000);
> - } else {
> - return float32_zero;
> - }
> -}
> -
> #define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
> static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
> { \
> @@ -517,143 +483,6 @@ static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
> }
>
> GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
> -GEN_XF_ROUND(float32, SF_MANTBITS, SF_INF_EXP, Float)
> -
> -static bool is_inf_prod(float64 a, float64 b)
> -{
> - return ((float64_is_infinity(a) && float64_is_infinity(b)) ||
> - (float64_is_infinity(a) && is_finite(b) && (!float64_is_zero(b))) ||
> - (float64_is_infinity(b) && is_finite(a) && (!float64_is_zero(a))));
> -}
> -
> -static float64 special_fma(float64 a, float64 b, float64 c,
> - float_status *fp_status)
> -{
> - float64 ret = make_float64(0);
> -
> - /*
> - * If A multiplied by B is an exact infinity and C is also an infinity
> - * but with the opposite sign, FMA returns NaN and raises invalid.
> - */
> - uint8_t a_sign = float64_is_neg(a);
> - uint8_t b_sign = float64_is_neg(b);
> - uint8_t c_sign = float64_is_neg(c);
> - if (is_inf_prod(a, b) && float64_is_infinity(c)) {
> - if ((a_sign ^ b_sign) != c_sign) {
> - ret = make_float64(DF_NAN);
> - float_raise(float_flag_invalid, fp_status);
> - return ret;
> - }
> - }
> - if ((float64_is_infinity(a) && float64_is_zero(b)) ||
> - (float64_is_zero(a) && float64_is_infinity(b))) {
> - ret = make_float64(DF_NAN);
> - float_raise(float_flag_invalid, fp_status);
> - return ret;
> - }
> - /*
> - * If none of the above checks are true and C is a NaN,
> - * a NaN shall be returned
> - * If A or B are NaN, a NAN shall be returned.
> - */
> - if (float64_is_any_nan(a) ||
> - float64_is_any_nan(b) ||
> - float64_is_any_nan(c)) {
> - if (float64_is_any_nan(a) && (fGETBIT(51, a) == 0)) {
> - float_raise(float_flag_invalid, fp_status);
> - }
> - if (float64_is_any_nan(b) && (fGETBIT(51, b) == 0)) {
> - float_raise(float_flag_invalid, fp_status);
> - }
> - if (float64_is_any_nan(c) && (fGETBIT(51, c) == 0)) {
> - float_raise(float_flag_invalid, fp_status);
> - }
> - ret = make_float64(DF_NAN);
> - return ret;
> - }
> - /*
> - * We have checked for adding opposite-signed infinities.
> - * Other infinities return infinity with the correct sign
> - */
> - if (float64_is_infinity(c)) {
> - ret = infinite_float64(c_sign);
> - return ret;
> - }
> - if (float64_is_infinity(a) || float64_is_infinity(b)) {
> - ret = infinite_float64(a_sign ^ b_sign);
> - return ret;
> - }
> - g_assert_not_reached();
> -}
> -
> -static float32 special_fmaf(float32 a, float32 b, float32 c,
> - float_status *fp_status)
> -{
> - float64 aa, bb, cc;
> - aa = float32_to_float64(a, fp_status);
> - bb = float32_to_float64(b, fp_status);
> - cc = float32_to_float64(c, fp_status);
> - return float64_to_float32(special_fma(aa, bb, cc, fp_status), fp_status);
> -}
> -
> -float32 internal_fmafx(float32 a, float32 b, float32 c, int scale,
> - float_status *fp_status)
> -{
> - Accum prod;
> - Accum acc;
> - Accum result;
> - accum_init(&prod);
> - accum_init(&acc);
> - accum_init(&result);
> -
> - uint8_t a_sign = float32_is_neg(a);
> - uint8_t b_sign = float32_is_neg(b);
> - uint8_t c_sign = float32_is_neg(c);
> - if (float32_is_infinity(a) ||
> - float32_is_infinity(b) ||
> - float32_is_infinity(c)) {
> - return special_fmaf(a, b, c, fp_status);
> - }
> - if (float32_is_any_nan(a) ||
> - float32_is_any_nan(b) ||
> - float32_is_any_nan(c)) {
> - return special_fmaf(a, b, c, fp_status);
> - }
> - if ((scale == 0) && (float32_is_zero(a) || float32_is_zero(b))) {
> - float32 tmp = float32_mul(a, b, fp_status);
> - tmp = float32_add(tmp, c, fp_status);
> - return tmp;
> - }
> -
> - /* (a * 2**b) * (c * 2**d) == a*c * 2**(b+d) */
> - prod.mant = int128_mul_6464(float32_getmant(a), float32_getmant(b));
> -
> - /*
> - * Note: extracting the mantissa into an int is multiplying by
> - * 2**23, so adjust here
> - */
> - prod.exp = float32_getexp(a) + float32_getexp(b) - SF_BIAS - 23;
> - prod.sign = a_sign ^ b_sign;
> - if (float32_is_zero(a) || float32_is_zero(b)) {
> - prod.exp = -2 * WAY_BIG_EXP;
> - }
> - if ((scale > 0) && float32_is_denormal(c)) {
> - acc.mant = int128_mul_6464(0, 0);
> - acc.exp = -WAY_BIG_EXP;
> - acc.sign = c_sign;
> - acc.sticky = 1;
> - result = accum_add(prod, acc);
> - } else if (!float32_is_zero(c)) {
> - acc.mant = int128_mul_6464(float32_getmant(c), 1);
> - acc.exp = float32_getexp(c);
> - acc.sign = c_sign;
> - result = accum_add(prod, acc);
> - } else {
> - result = prod;
> - }
> - result.exp += scale;
> - return accum_round_float32(result, fp_status);
> -}
>
> float64 internal_mpyhh(float64 a, float64 b,
> unsigned long long int accumulated,
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 13/17] target/hexagon: Expand GEN_XF_ROUND
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (11 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 12/17] target/hexagon: Remove internal_fmafx Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-10 21:53 ` Brian Cain
2024-12-08 22:48 ` [PATCH 14/17] target/hexagon: Remove Float Richard Henderson
` (3 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
This massive macro is now only used once.
Expand it for use only by float64.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/hexagon/fma_emu.c | 253 +++++++++++++++++++--------------------
1 file changed, 125 insertions(+), 128 deletions(-)
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
index 0c7c7f636c..bce3bd4dfb 100644
--- a/target/hexagon/fma_emu.c
+++ b/target/hexagon/fma_emu.c
@@ -354,136 +354,133 @@ float32 infinite_float32(uint8_t sign)
}
/* Return a maximum finite value with the requested sign */
-#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
-static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
-{ \
- if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
- && ((a.guard | a.round | a.sticky) == 0)) { \
- /* result zero */ \
- switch (fp_status->float_rounding_mode) { \
- case float_round_down: \
- return zero_##SUFFIX(1); \
- default: \
- return zero_##SUFFIX(0); \
- } \
- } \
- /* Normalize right */ \
- /* We want MANTBITS bits of mantissa plus the leading one. */ \
- /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
- /* So we need to normalize right while the high word is non-zero and \
- * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
- while ((int128_gethi(a.mant) != 0) || \
- ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
- a = accum_norm_right(a, 1); \
- } \
- /* \
- * OK, now normalize left \
- * We want to normalize left until we have a leading one in bit 24 \
- * Theoretically, we only need to shift a maximum of one to the left if we \
- * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
- * should be 0 \
- */ \
- while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
- a = accum_norm_left(a); \
- } \
- /* \
- * OK, now we might need to denormalize because of potential underflow. \
- * We need to do this before rounding, and rounding might make us normal \
- * again \
- */ \
- while (a.exp <= 0) { \
- a = accum_norm_right(a, 1 - a.exp); \
- /* \
- * Do we have underflow? \
- * That's when we get an inexact answer because we ran out of bits \
- * in a denormal. \
- */ \
- if (a.guard || a.round || a.sticky) { \
- float_raise(float_flag_underflow, fp_status); \
- } \
- } \
- /* OK, we're relatively canonical... now we need to round */ \
- if (a.guard || a.round || a.sticky) { \
- float_raise(float_flag_inexact, fp_status); \
- switch (fp_status->float_rounding_mode) { \
- case float_round_to_zero: \
- /* Chop and we're done */ \
- break; \
- case float_round_up: \
- if (a.sign == 0) { \
- a.mant = int128_add(a.mant, int128_one()); \
- } \
- break; \
- case float_round_down: \
- if (a.sign != 0) { \
- a.mant = int128_add(a.mant, int128_one()); \
- } \
- break; \
- default: \
- if (a.round || a.sticky) { \
- /* round up if guard is 1, down if guard is zero */ \
- a.mant = int128_add(a.mant, int128_make64(a.guard)); \
- } else if (a.guard) { \
- /* exactly .5, round up if odd */ \
- a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
- } \
- break; \
- } \
- } \
- /* \
- * OK, now we might have carried all the way up. \
- * So we might need to shr once \
- * at least we know that the lsb should be zero if we rounded and \
- * got a carry out... \
- */ \
- if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
- a = accum_norm_right(a, 1); \
- } \
- /* Overflow? */ \
- if (a.exp >= INF_EXP) { \
- /* Yep, inf result */ \
- float_raise(float_flag_overflow, fp_status); \
- float_raise(float_flag_inexact, fp_status); \
- switch (fp_status->float_rounding_mode) { \
- case float_round_to_zero: \
- return maxfinite_##SUFFIX(a.sign); \
- case float_round_up: \
- if (a.sign == 0) { \
- return infinite_##SUFFIX(a.sign); \
- } else { \
- return maxfinite_##SUFFIX(a.sign); \
- } \
- case float_round_down: \
- if (a.sign != 0) { \
- return infinite_##SUFFIX(a.sign); \
- } else { \
- return maxfinite_##SUFFIX(a.sign); \
- } \
- default: \
- return infinite_##SUFFIX(a.sign); \
- } \
- } \
- /* Underflow? */ \
- if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
- /* Leading one means: No, we're normal. So, we should be done... */ \
- INTERNAL_TYPE ret; \
- ret.i = 0; \
- ret.sign = a.sign; \
- ret.exp = a.exp; \
- ret.mant = int128_getlo(a.mant); \
- return ret.i; \
- } \
- assert(a.exp == 1); \
- INTERNAL_TYPE ret; \
- ret.i = 0; \
- ret.sign = a.sign; \
- ret.exp = 0; \
- ret.mant = int128_getlo(a.mant); \
- return ret.i; \
+static float64 accum_round_float64(Accum a, float_status * fp_status)
+{
+ if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
+ && ((a.guard | a.round | a.sticky) == 0)) {
+ /* result zero */
+ switch (fp_status->float_rounding_mode) {
+ case float_round_down:
+ return zero_float64(1);
+ default:
+ return zero_float64(0);
+ }
+ }
+ /* Normalize right */
+ /* We want DF_MANTBITS bits of mantissa plus the leading one. */
+ /* That means that we want DF_MANTBITS+1 bits, or 0x000000000000FF_FFFF */
+ /* So we need to normalize right while the high word is non-zero and
+ * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */
+ while ((int128_gethi(a.mant) != 0) ||
+ ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0)) {
+ a = accum_norm_right(a, 1);
+ }
+ /*
+ * OK, now normalize left
+ * We want to normalize left until we have a leading one in bit 24
+ * Theoretically, we only need to shift a maximum of one to the left if we
+ * shifted out lots of bits from B, or if we had no shift / 1 shift sticky
+ * should be 0
+ */
+ while ((int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) == 0) {
+ a = accum_norm_left(a);
+ }
+ /*
+ * OK, now we might need to denormalize because of potential underflow.
+ * We need to do this before rounding, and rounding might make us normal
+ * again
+ */
+ while (a.exp <= 0) {
+ a = accum_norm_right(a, 1 - a.exp);
+ /*
+ * Do we have underflow?
+ * That's when we get an inexact answer because we ran out of bits
+ * in a denormal.
+ */
+ if (a.guard || a.round || a.sticky) {
+ float_raise(float_flag_underflow, fp_status);
+ }
+ }
+ /* OK, we're relatively canonical... now we need to round */
+ if (a.guard || a.round || a.sticky) {
+ float_raise(float_flag_inexact, fp_status);
+ switch (fp_status->float_rounding_mode) {
+ case float_round_to_zero:
+ /* Chop and we're done */
+ break;
+ case float_round_up:
+ if (a.sign == 0) {
+ a.mant = int128_add(a.mant, int128_one());
+ }
+ break;
+ case float_round_down:
+ if (a.sign != 0) {
+ a.mant = int128_add(a.mant, int128_one());
+ }
+ break;
+ default:
+ if (a.round || a.sticky) {
+ /* round up if guard is 1, down if guard is zero */
+ a.mant = int128_add(a.mant, int128_make64(a.guard));
+ } else if (a.guard) {
+ /* exactly .5, round up if odd */
+ a.mant = int128_add(a.mant, int128_and(a.mant, int128_one()));
+ }
+ break;
+ }
+ }
+ /*
+ * OK, now we might have carried all the way up.
+ * So we might need to shr once
+ * at least we know that the lsb should be zero if we rounded and
+ * got a carry out...
+ */
+ if ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0) {
+ a = accum_norm_right(a, 1);
+ }
+ /* Overflow? */
+ if (a.exp >= DF_INF_EXP) {
+ /* Yep, inf result */
+ float_raise(float_flag_overflow, fp_status);
+ float_raise(float_flag_inexact, fp_status);
+ switch (fp_status->float_rounding_mode) {
+ case float_round_to_zero:
+ return maxfinite_float64(a.sign);
+ case float_round_up:
+ if (a.sign == 0) {
+ return infinite_float64(a.sign);
+ } else {
+ return maxfinite_float64(a.sign);
+ }
+ case float_round_down:
+ if (a.sign != 0) {
+ return infinite_float64(a.sign);
+ } else {
+ return maxfinite_float64(a.sign);
+ }
+ default:
+ return infinite_float64(a.sign);
+ }
+ }
+ /* Underflow? */
+ if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
+ /* Leading one means: No, we're normal. So, we should be done... */
+ Double ret;
+ ret.i = 0;
+ ret.sign = a.sign;
+ ret.exp = a.exp;
+ ret.mant = int128_getlo(a.mant);
+ return ret.i;
+ }
+ assert(a.exp == 1);
+ Double ret;
+ ret.i = 0;
+ ret.sign = a.sign;
+ ret.exp = 0;
+ ret.mant = int128_getlo(a.mant);
+ return ret.i;
}
-GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
-
float64 internal_mpyhh(float64 a, float64 b,
unsigned long long int accumulated,
float_status *fp_status)
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 13/17] target/hexagon: Expand GEN_XF_ROUND
2024-12-08 22:48 ` [PATCH 13/17] target/hexagon: Expand GEN_XF_ROUND Richard Henderson
@ 2024-12-10 21:53 ` Brian Cain
0 siblings, 0 replies; 33+ messages in thread
From: Brian Cain @ 2024-12-10 21:53 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell, mark.cave-ayland, Brian Cain
On 12/8/2024 4:48 PM, Richard Henderson wrote:
> This massive macro is now only used once.
> Expand it for use only by float64.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/hexagon/fma_emu.c | 253 +++++++++++++++++++--------------------
> 1 file changed, 125 insertions(+), 128 deletions(-)
>
> diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
> index 0c7c7f636c..bce3bd4dfb 100644
> --- a/target/hexagon/fma_emu.c
> +++ b/target/hexagon/fma_emu.c
> @@ -354,136 +354,133 @@ float32 infinite_float32(uint8_t sign)
> }
>
> /* Return a maximum finite value with the requested sign */
> -#define GEN_XF_ROUND(SUFFIX, MANTBITS, INF_EXP, INTERNAL_TYPE) \
> -static SUFFIX accum_round_##SUFFIX(Accum a, float_status * fp_status) \
> -{ \
> - if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0) \
> - && ((a.guard | a.round | a.sticky) == 0)) { \
> - /* result zero */ \
> - switch (fp_status->float_rounding_mode) { \
> - case float_round_down: \
> - return zero_##SUFFIX(1); \
> - default: \
> - return zero_##SUFFIX(0); \
> - } \
> - } \
> - /* Normalize right */ \
> - /* We want MANTBITS bits of mantissa plus the leading one. */ \
> - /* That means that we want MANTBITS+1 bits, or 0x000000000000FF_FFFF */ \
> - /* So we need to normalize right while the high word is non-zero and \
> - * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */ \
> - while ((int128_gethi(a.mant) != 0) || \
> - ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0)) { \
> - a = accum_norm_right(a, 1); \
> - } \
> - /* \
> - * OK, now normalize left \
> - * We want to normalize left until we have a leading one in bit 24 \
> - * Theoretically, we only need to shift a maximum of one to the left if we \
> - * shifted out lots of bits from B, or if we had no shift / 1 shift sticky \
> - * should be 0 \
> - */ \
> - while ((int128_getlo(a.mant) & (1ULL << MANTBITS)) == 0) { \
> - a = accum_norm_left(a); \
> - } \
> - /* \
> - * OK, now we might need to denormalize because of potential underflow. \
> - * We need to do this before rounding, and rounding might make us normal \
> - * again \
> - */ \
> - while (a.exp <= 0) { \
> - a = accum_norm_right(a, 1 - a.exp); \
> - /* \
> - * Do we have underflow? \
> - * That's when we get an inexact answer because we ran out of bits \
> - * in a denormal. \
> - */ \
> - if (a.guard || a.round || a.sticky) { \
> - float_raise(float_flag_underflow, fp_status); \
> - } \
> - } \
> - /* OK, we're relatively canonical... now we need to round */ \
> - if (a.guard || a.round || a.sticky) { \
> - float_raise(float_flag_inexact, fp_status); \
> - switch (fp_status->float_rounding_mode) { \
> - case float_round_to_zero: \
> - /* Chop and we're done */ \
> - break; \
> - case float_round_up: \
> - if (a.sign == 0) { \
> - a.mant = int128_add(a.mant, int128_one()); \
> - } \
> - break; \
> - case float_round_down: \
> - if (a.sign != 0) { \
> - a.mant = int128_add(a.mant, int128_one()); \
> - } \
> - break; \
> - default: \
> - if (a.round || a.sticky) { \
> - /* round up if guard is 1, down if guard is zero */ \
> - a.mant = int128_add(a.mant, int128_make64(a.guard)); \
> - } else if (a.guard) { \
> - /* exactly .5, round up if odd */ \
> - a.mant = int128_add(a.mant, int128_and(a.mant, int128_one())); \
> - } \
> - break; \
> - } \
> - } \
> - /* \
> - * OK, now we might have carried all the way up. \
> - * So we might need to shr once \
> - * at least we know that the lsb should be zero if we rounded and \
> - * got a carry out... \
> - */ \
> - if ((int128_getlo(a.mant) >> (MANTBITS + 1)) != 0) { \
> - a = accum_norm_right(a, 1); \
> - } \
> - /* Overflow? */ \
> - if (a.exp >= INF_EXP) { \
> - /* Yep, inf result */ \
> - float_raise(float_flag_overflow, fp_status); \
> - float_raise(float_flag_inexact, fp_status); \
> - switch (fp_status->float_rounding_mode) { \
> - case float_round_to_zero: \
> - return maxfinite_##SUFFIX(a.sign); \
> - case float_round_up: \
> - if (a.sign == 0) { \
> - return infinite_##SUFFIX(a.sign); \
> - } else { \
> - return maxfinite_##SUFFIX(a.sign); \
> - } \
> - case float_round_down: \
> - if (a.sign != 0) { \
> - return infinite_##SUFFIX(a.sign); \
> - } else { \
> - return maxfinite_##SUFFIX(a.sign); \
> - } \
> - default: \
> - return infinite_##SUFFIX(a.sign); \
> - } \
> - } \
> - /* Underflow? */ \
> - if (int128_getlo(a.mant) & (1ULL << MANTBITS)) { \
> - /* Leading one means: No, we're normal. So, we should be done... */ \
> - INTERNAL_TYPE ret; \
> - ret.i = 0; \
> - ret.sign = a.sign; \
> - ret.exp = a.exp; \
> - ret.mant = int128_getlo(a.mant); \
> - return ret.i; \
> - } \
> - assert(a.exp == 1); \
> - INTERNAL_TYPE ret; \
> - ret.i = 0; \
> - ret.sign = a.sign; \
> - ret.exp = 0; \
> - ret.mant = int128_getlo(a.mant); \
> - return ret.i; \
> +static float64 accum_round_float64(Accum a, float_status * fp_status)
> +{
> + if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
> + && ((a.guard | a.round | a.sticky) == 0)) {
> + /* result zero */
> + switch (fp_status->float_rounding_mode) {
> + case float_round_down:
> + return zero_float64(1);
> + default:
> + return zero_float64(0);
> + }
> + }
> + /* Normalize right */
> + /* We want DF_MANTBITS bits of mantissa plus the leading one. */
> + /* That means that we want DF_MANTBITS+1 bits, or 0x000000000000FF_FFFF */
> + /* So we need to normalize right while the high word is non-zero and
> + * while the low word is nonzero when masked with 0xffe0_0000_0000_0000 */
> + while ((int128_gethi(a.mant) != 0) ||
> + ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0)) {
> + a = accum_norm_right(a, 1);
> + }
> + /*
> + * OK, now normalize left
> + * We want to normalize left until we have a leading one in bit 24
> + * Theoretically, we only need to shift a maximum of one to the left if we
> + * shifted out lots of bits from B, or if we had no shift / 1 shift sticky
> + * should be 0
> + */
> + while ((int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) == 0) {
> + a = accum_norm_left(a);
> + }
> + /*
> + * OK, now we might need to denormalize because of potential underflow.
> + * We need to do this before rounding, and rounding might make us normal
> + * again
> + */
> + while (a.exp <= 0) {
> + a = accum_norm_right(a, 1 - a.exp);
> + /*
> + * Do we have underflow?
> + * That's when we get an inexact answer because we ran out of bits
> + * in a denormal.
> + */
> + if (a.guard || a.round || a.sticky) {
> + float_raise(float_flag_underflow, fp_status);
> + }
> + }
> + /* OK, we're relatively canonical... now we need to round */
> + if (a.guard || a.round || a.sticky) {
> + float_raise(float_flag_inexact, fp_status);
> + switch (fp_status->float_rounding_mode) {
> + case float_round_to_zero:
> + /* Chop and we're done */
> + break;
> + case float_round_up:
> + if (a.sign == 0) {
> + a.mant = int128_add(a.mant, int128_one());
> + }
> + break;
> + case float_round_down:
> + if (a.sign != 0) {
> + a.mant = int128_add(a.mant, int128_one());
> + }
> + break;
> + default:
> + if (a.round || a.sticky) {
> + /* round up if guard is 1, down if guard is zero */
> + a.mant = int128_add(a.mant, int128_make64(a.guard));
> + } else if (a.guard) {
> + /* exactly .5, round up if odd */
> + a.mant = int128_add(a.mant, int128_and(a.mant, int128_one()));
> + }
> + break;
> + }
> + }
> + /*
> + * OK, now we might have carried all the way up.
> + * So we might need to shr once
> + * at least we know that the lsb should be zero if we rounded and
> + * got a carry out...
> + */
> + if ((int128_getlo(a.mant) >> (DF_MANTBITS + 1)) != 0) {
> + a = accum_norm_right(a, 1);
> + }
> + /* Overflow? */
> + if (a.exp >= DF_INF_EXP) {
> + /* Yep, inf result */
> + float_raise(float_flag_overflow, fp_status);
> + float_raise(float_flag_inexact, fp_status);
> + switch (fp_status->float_rounding_mode) {
> + case float_round_to_zero:
> + return maxfinite_float64(a.sign);
> + case float_round_up:
> + if (a.sign == 0) {
> + return infinite_float64(a.sign);
> + } else {
> + return maxfinite_float64(a.sign);
> + }
> + case float_round_down:
> + if (a.sign != 0) {
> + return infinite_float64(a.sign);
> + } else {
> + return maxfinite_float64(a.sign);
> + }
> + default:
> + return infinite_float64(a.sign);
> + }
> + }
> + /* Underflow? */
> + if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
> + /* Leading one means: No, we're normal. So, we should be done... */
> + Double ret;
> + ret.i = 0;
> + ret.sign = a.sign;
> + ret.exp = a.exp;
> + ret.mant = int128_getlo(a.mant);
> + return ret.i;
> + }
> + assert(a.exp == 1);
> + Double ret;
> + ret.i = 0;
> + ret.sign = a.sign;
> + ret.exp = 0;
> + ret.mant = int128_getlo(a.mant);
> + return ret.i;
> }
>
> -GEN_XF_ROUND(float64, DF_MANTBITS, DF_INF_EXP, Double)
> -
> float64 internal_mpyhh(float64 a, float64 b,
> unsigned long long int accumulated,
> float_status *fp_status)
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 14/17] target/hexagon: Remove Float
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (12 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 13/17] target/hexagon: Expand GEN_XF_ROUND Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-10 21:54 ` Brian Cain
2024-12-08 22:48 ` [PATCH 15/17] target/hexagon: Remove Double Richard Henderson
` (2 subsequent siblings)
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
This structure, with bitfields, is incorrect for big-endian.
Use the existing float32_getexp_raw which uses extract32.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/hexagon/fma_emu.c | 16 +++-------------
1 file changed, 3 insertions(+), 13 deletions(-)
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
index bce3bd4dfb..c359eecffd 100644
--- a/target/hexagon/fma_emu.c
+++ b/target/hexagon/fma_emu.c
@@ -53,16 +53,6 @@ typedef union {
};
} Double;
-typedef union {
- float f;
- uint32_t i;
- struct {
- uint32_t mant:23;
- uint32_t exp:8;
- uint32_t sign:1;
- };
-} Float;
-
static uint64_t float64_getmant(float64 f64)
{
Double a = { .i = f64 };
@@ -92,12 +82,12 @@ int32_t float64_getexp(float64 f64)
int32_t float32_getexp(float32 f32)
{
- Float a = { .i = f32 };
+ int exp = float32_getexp_raw(f32);
if (float32_is_normal(f32)) {
- return a.exp;
+ return exp;
}
if (float32_is_denormal(f32)) {
- return a.exp + 1;
+ return exp + 1;
}
return -1;
}
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 14/17] target/hexagon: Remove Float
2024-12-08 22:48 ` [PATCH 14/17] target/hexagon: Remove Float Richard Henderson
@ 2024-12-10 21:54 ` Brian Cain
0 siblings, 0 replies; 33+ messages in thread
From: Brian Cain @ 2024-12-10 21:54 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell, mark.cave-ayland, Brian Cain
On 12/8/2024 4:48 PM, Richard Henderson wrote:
> This structure, with bitfields, is incorrect for big-endian.
> Use the existing float32_getexp_raw which uses extract32.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/hexagon/fma_emu.c | 16 +++-------------
> 1 file changed, 3 insertions(+), 13 deletions(-)
>
> diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
> index bce3bd4dfb..c359eecffd 100644
> --- a/target/hexagon/fma_emu.c
> +++ b/target/hexagon/fma_emu.c
> @@ -53,16 +53,6 @@ typedef union {
> };
> } Double;
>
> -typedef union {
> - float f;
> - uint32_t i;
> - struct {
> - uint32_t mant:23;
> - uint32_t exp:8;
> - uint32_t sign:1;
> - };
> -} Float;
> -
> static uint64_t float64_getmant(float64 f64)
> {
> Double a = { .i = f64 };
> @@ -92,12 +82,12 @@ int32_t float64_getexp(float64 f64)
>
> int32_t float32_getexp(float32 f32)
> {
> - Float a = { .i = f32 };
> + int exp = float32_getexp_raw(f32);
> if (float32_is_normal(f32)) {
> - return a.exp;
> + return exp;
> }
> if (float32_is_denormal(f32)) {
> - return a.exp + 1;
> + return exp + 1;
> }
> return -1;
> }
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 15/17] target/hexagon: Remove Double
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (13 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 14/17] target/hexagon: Remove Float Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-10 21:54 ` Brian Cain
2024-12-08 22:48 ` [PATCH 16/17] target/hexagon: Use mulu64 for int128_mul_6464 Richard Henderson
2024-12-08 22:48 ` [PATCH 17/17] target/hexagon: Simplify internal_mpyhh setup Richard Henderson
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
This structure, with bitfields, is incorrect for big-endian.
Use extract64 and deposit64 instead.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/hexagon/fma_emu.c | 46 ++++++++++++++--------------------------
1 file changed, 16 insertions(+), 30 deletions(-)
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
index c359eecffd..343c40a686 100644
--- a/target/hexagon/fma_emu.c
+++ b/target/hexagon/fma_emu.c
@@ -43,39 +43,29 @@
#define WAY_BIG_EXP 4096
-typedef union {
- double f;
- uint64_t i;
- struct {
- uint64_t mant:52;
- uint64_t exp:11;
- uint64_t sign:1;
- };
-} Double;
-
static uint64_t float64_getmant(float64 f64)
{
- Double a = { .i = f64 };
+ uint64_t mant = extract64(f64, 0, 52);
if (float64_is_normal(f64)) {
- return a.mant | 1ULL << 52;
+ return mant | 1ULL << 52;
}
if (float64_is_zero(f64)) {
return 0;
}
if (float64_is_denormal(f64)) {
- return a.mant;
+ return mant;
}
return ~0ULL;
}
int32_t float64_getexp(float64 f64)
{
- Double a = { .i = f64 };
+ int exp = extract64(f64, 52, 11);
if (float64_is_normal(f64)) {
- return a.exp;
+ return exp;
}
if (float64_is_denormal(f64)) {
- return a.exp + 1;
+ return exp + 1;
}
return -1;
}
@@ -346,6 +336,8 @@ float32 infinite_float32(uint8_t sign)
/* Return a maximum finite value with the requested sign */
static float64 accum_round_float64(Accum a, float_status * fp_status)
{
+ uint64_t ret;
+
if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
&& ((a.guard | a.round | a.sticky) == 0)) {
/* result zero */
@@ -453,22 +445,16 @@ static float64 accum_round_float64(Accum a, float_status * fp_status)
}
}
/* Underflow? */
- if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
+ ret = int128_getlo(a.mant);
+ if (ret & (1ULL << DF_MANTBITS)) {
/* Leading one means: No, we're normal. So, we should be done... */
- Double ret;
- ret.i = 0;
- ret.sign = a.sign;
- ret.exp = a.exp;
- ret.mant = int128_getlo(a.mant);
- return ret.i;
+ ret = deposit64(ret, 52, 11, a.exp);
+ } else {
+ assert(a.exp == 1);
+ ret = deposit64(ret, 52, 11, 0);
}
- assert(a.exp == 1);
- Double ret;
- ret.i = 0;
- ret.sign = a.sign;
- ret.exp = 0;
- ret.mant = int128_getlo(a.mant);
- return ret.i;
+ ret = deposit64(ret, 63, 1, a.sign);
+ return ret;
}
float64 internal_mpyhh(float64 a, float64 b,
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 15/17] target/hexagon: Remove Double
2024-12-08 22:48 ` [PATCH 15/17] target/hexagon: Remove Double Richard Henderson
@ 2024-12-10 21:54 ` Brian Cain
0 siblings, 0 replies; 33+ messages in thread
From: Brian Cain @ 2024-12-10 21:54 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell, mark.cave-ayland, Brian Cain
On 12/8/2024 4:48 PM, Richard Henderson wrote:
> This structure, with bitfields, is incorrect for big-endian.
> Use extract64 and deposit64 instead.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/hexagon/fma_emu.c | 46 ++++++++++++++--------------------------
> 1 file changed, 16 insertions(+), 30 deletions(-)
>
> diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
> index c359eecffd..343c40a686 100644
> --- a/target/hexagon/fma_emu.c
> +++ b/target/hexagon/fma_emu.c
> @@ -43,39 +43,29 @@
>
> #define WAY_BIG_EXP 4096
>
> -typedef union {
> - double f;
> - uint64_t i;
> - struct {
> - uint64_t mant:52;
> - uint64_t exp:11;
> - uint64_t sign:1;
> - };
> -} Double;
> -
> static uint64_t float64_getmant(float64 f64)
> {
> - Double a = { .i = f64 };
> + uint64_t mant = extract64(f64, 0, 52);
> if (float64_is_normal(f64)) {
> - return a.mant | 1ULL << 52;
> + return mant | 1ULL << 52;
> }
> if (float64_is_zero(f64)) {
> return 0;
> }
> if (float64_is_denormal(f64)) {
> - return a.mant;
> + return mant;
> }
> return ~0ULL;
> }
>
> int32_t float64_getexp(float64 f64)
> {
> - Double a = { .i = f64 };
> + int exp = extract64(f64, 52, 11);
> if (float64_is_normal(f64)) {
> - return a.exp;
> + return exp;
> }
> if (float64_is_denormal(f64)) {
> - return a.exp + 1;
> + return exp + 1;
> }
> return -1;
> }
> @@ -346,6 +336,8 @@ float32 infinite_float32(uint8_t sign)
> /* Return a maximum finite value with the requested sign */
> static float64 accum_round_float64(Accum a, float_status * fp_status)
> {
> + uint64_t ret;
> +
> if ((int128_gethi(a.mant) == 0) && (int128_getlo(a.mant) == 0)
> && ((a.guard | a.round | a.sticky) == 0)) {
> /* result zero */
> @@ -453,22 +445,16 @@ static float64 accum_round_float64(Accum a, float_status * fp_status)
> }
> }
> /* Underflow? */
> - if (int128_getlo(a.mant) & (1ULL << DF_MANTBITS)) {
> + ret = int128_getlo(a.mant);
> + if (ret & (1ULL << DF_MANTBITS)) {
> /* Leading one means: No, we're normal. So, we should be done... */
> - Double ret;
> - ret.i = 0;
> - ret.sign = a.sign;
> - ret.exp = a.exp;
> - ret.mant = int128_getlo(a.mant);
> - return ret.i;
> + ret = deposit64(ret, 52, 11, a.exp);
> + } else {
> + assert(a.exp == 1);
> + ret = deposit64(ret, 52, 11, 0);
> }
> - assert(a.exp == 1);
> - Double ret;
> - ret.i = 0;
> - ret.sign = a.sign;
> - ret.exp = 0;
> - ret.mant = int128_getlo(a.mant);
> - return ret.i;
> + ret = deposit64(ret, 63, 1, a.sign);
> + return ret;
> }
>
> float64 internal_mpyhh(float64 a, float64 b,
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 16/17] target/hexagon: Use mulu64 for int128_mul_6464
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (14 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 15/17] target/hexagon: Remove Double Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-10 2:06 ` Brian Cain
2024-12-08 22:48 ` [PATCH 17/17] target/hexagon: Simplify internal_mpyhh setup Richard Henderson
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
No need to open-code 64x64->128-bit multiplication.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/hexagon/fma_emu.c | 32 +++-----------------------------
1 file changed, 3 insertions(+), 29 deletions(-)
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
index 343c40a686..6749538c09 100644
--- a/target/hexagon/fma_emu.c
+++ b/target/hexagon/fma_emu.c
@@ -82,38 +82,12 @@ int32_t float32_getexp(float32 f32)
return -1;
}
-static uint32_t int128_getw0(Int128 x)
-{
- return int128_getlo(x);
-}
-
-static uint32_t int128_getw1(Int128 x)
-{
- return int128_getlo(x) >> 32;
-}
-
static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
{
- Int128 a, b;
- uint64_t pp0, pp1a, pp1b, pp1s, pp2;
+ uint64_t l, h;
- a = int128_make64(ai);
- b = int128_make64(bi);
- pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
- pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
- pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
- pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
-
- pp1s = pp1a + pp1b;
- if ((pp1s < pp1a) || (pp1s < pp1b)) {
- pp2 += (1ULL << 32);
- }
- uint64_t ret_low = pp0 + (pp1s << 32);
- if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
- pp2 += 1;
- }
-
- return int128_make128(ret_low, pp2 + (pp1s >> 32));
+ mulu64(&l, &h, ai, bi);
+ return int128_make128(l, h);
}
static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 16/17] target/hexagon: Use mulu64 for int128_mul_6464
2024-12-08 22:48 ` [PATCH 16/17] target/hexagon: Use mulu64 for int128_mul_6464 Richard Henderson
@ 2024-12-10 2:06 ` Brian Cain
0 siblings, 0 replies; 33+ messages in thread
From: Brian Cain @ 2024-12-10 2:06 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell, mark.cave-ayland, Brian Cain
On 12/8/2024 4:48 PM, Richard Henderson wrote:
> No need to open-code 64x64->128-bit multiplication.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/hexagon/fma_emu.c | 32 +++-----------------------------
> 1 file changed, 3 insertions(+), 29 deletions(-)
>
> diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
> index 343c40a686..6749538c09 100644
> --- a/target/hexagon/fma_emu.c
> +++ b/target/hexagon/fma_emu.c
> @@ -82,38 +82,12 @@ int32_t float32_getexp(float32 f32)
> return -1;
> }
>
> -static uint32_t int128_getw0(Int128 x)
> -{
> - return int128_getlo(x);
> -}
> -
> -static uint32_t int128_getw1(Int128 x)
> -{
> - return int128_getlo(x) >> 32;
> -}
> -
> static Int128 int128_mul_6464(uint64_t ai, uint64_t bi)
> {
> - Int128 a, b;
> - uint64_t pp0, pp1a, pp1b, pp1s, pp2;
> + uint64_t l, h;
>
> - a = int128_make64(ai);
> - b = int128_make64(bi);
> - pp0 = (uint64_t)int128_getw0(a) * (uint64_t)int128_getw0(b);
> - pp1a = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw0(b);
> - pp1b = (uint64_t)int128_getw1(b) * (uint64_t)int128_getw0(a);
> - pp2 = (uint64_t)int128_getw1(a) * (uint64_t)int128_getw1(b);
> -
> - pp1s = pp1a + pp1b;
> - if ((pp1s < pp1a) || (pp1s < pp1b)) {
> - pp2 += (1ULL << 32);
> - }
> - uint64_t ret_low = pp0 + (pp1s << 32);
> - if ((ret_low < pp0) || (ret_low < (pp1s << 32))) {
> - pp2 += 1;
> - }
> -
> - return int128_make128(ret_low, pp2 + (pp1s >> 32));
> + mulu64(&l, &h, ai, bi);
> + return int128_make128(l, h);
> }
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
>
> static Int128 int128_sub_borrow(Int128 a, Int128 b, int borrow)
^ permalink raw reply [flat|nested] 33+ messages in thread
* [PATCH 17/17] target/hexagon: Simplify internal_mpyhh setup
2024-12-08 22:48 [RFC PATCH 00/17] softfloat, hexagon: Cleanup fmaf Richard Henderson
` (15 preceding siblings ...)
2024-12-08 22:48 ` [PATCH 16/17] target/hexagon: Use mulu64 for int128_mul_6464 Richard Henderson
@ 2024-12-08 22:48 ` Richard Henderson
2024-12-10 2:02 ` Brian Cain
16 siblings, 1 reply; 33+ messages in thread
From: Richard Henderson @ 2024-12-08 22:48 UTC (permalink / raw)
To: qemu-devel; +Cc: bcain, peter.maydell, mark.cave-ayland
Initialize x with accumulated via direct assignment,
rather than multiplying by 1.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/hexagon/fma_emu.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
index 6749538c09..bc6e742639 100644
--- a/target/hexagon/fma_emu.c
+++ b/target/hexagon/fma_emu.c
@@ -453,7 +453,7 @@ float64 internal_mpyhh(float64 a, float64 b,
float64_is_infinity(b)) {
return float64_mul(a, b, fp_status);
}
- x.mant = int128_mul_6464(accumulated, 1);
+ x.mant = int128_make64(accumulated);
x.sticky = sticky;
prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
--
2.43.0
^ permalink raw reply related [flat|nested] 33+ messages in thread
* Re: [PATCH 17/17] target/hexagon: Simplify internal_mpyhh setup
2024-12-08 22:48 ` [PATCH 17/17] target/hexagon: Simplify internal_mpyhh setup Richard Henderson
@ 2024-12-10 2:02 ` Brian Cain
0 siblings, 0 replies; 33+ messages in thread
From: Brian Cain @ 2024-12-10 2:02 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: peter.maydell, mark.cave-ayland, Brian Cain
On 12/8/2024 4:48 PM, Richard Henderson wrote:
> Initialize x with accumulated via direct assignment,
> rather than multiplying by 1.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/hexagon/fma_emu.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/target/hexagon/fma_emu.c b/target/hexagon/fma_emu.c
> index 6749538c09..bc6e742639 100644
> --- a/target/hexagon/fma_emu.c
> +++ b/target/hexagon/fma_emu.c
> @@ -453,7 +453,7 @@ float64 internal_mpyhh(float64 a, float64 b,
> float64_is_infinity(b)) {
> return float64_mul(a, b, fp_status);
> }
> - x.mant = int128_mul_6464(accumulated, 1);
> + x.mant = int128_make64(accumulated);
> x.sticky = sticky;
> prod = fGETUWORD(1, float64_getmant(a)) * fGETUWORD(1, float64_getmant(b));
> x.mant = int128_add(x.mant, int128_mul_6464(prod, 0x100000000ULL));
Reviewed-by: Brian Cain <brian.cain@oss.qualcomm.com>
^ permalink raw reply [flat|nested] 33+ messages in thread