From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, "Alex Bennée" <alex.bennee@linaro.org>
Subject: [PULL 44/46] softfloat: Convert float-to-float conversions with float128
Date: Sun, 16 May 2021 07:34:29 -0500 [thread overview]
Message-ID: <20210516123431.718318-45-richard.henderson@linaro.org> (raw)
In-Reply-To: <20210516123431.718318-1-richard.henderson@linaro.org>
Introduce parts_float_to_float_widen and parts_float_to_float_narrow.
Use them for float128_to_float{32,64} and float{32,64}_to_float128.
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
fpu/softfloat.c | 203 ++++++++++++++++--------------------------------
1 file changed, 69 insertions(+), 134 deletions(-)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 80025539ef..d056b5730b 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -2092,6 +2092,35 @@ static void parts128_float_to_float(FloatParts128 *a, float_status *s)
#define parts_float_to_float(P, S) \
PARTS_GENERIC_64_128(float_to_float, P)(P, S)
+static void parts_float_to_float_narrow(FloatParts64 *a, FloatParts128 *b,
+ float_status *s)
+{
+ a->cls = b->cls;
+ a->sign = b->sign;
+ a->exp = b->exp;
+
+ if (a->cls == float_class_normal) {
+ frac_truncjam(a, b);
+ } else if (is_nan(a->cls)) {
+ /* Discard the low bits of the NaN. */
+ a->frac = b->frac_hi;
+ parts_return_nan(a, s);
+ }
+}
+
+static void parts_float_to_float_widen(FloatParts128 *a, FloatParts64 *b,
+ float_status *s)
+{
+ a->cls = b->cls;
+ a->sign = b->sign;
+ a->exp = b->exp;
+ frac_widen(a, b);
+
+ if (is_nan(a->cls)) {
+ parts_return_nan(a, s);
+ }
+}
+
float32 float16_to_float32(float16 a, bool ieee, float_status *s)
{
const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
@@ -2215,6 +2244,46 @@ bfloat16 float64_to_bfloat16(float64 a, float_status *s)
return bfloat16_round_pack_canonical(&p, s);
}
+float32 float128_to_float32(float128 a, float_status *s)
+{
+ FloatParts64 p64;
+ FloatParts128 p128;
+
+ float128_unpack_canonical(&p128, a, s);
+ parts_float_to_float_narrow(&p64, &p128, s);
+ return float32_round_pack_canonical(&p64, s);
+}
+
+float64 float128_to_float64(float128 a, float_status *s)
+{
+ FloatParts64 p64;
+ FloatParts128 p128;
+
+ float128_unpack_canonical(&p128, a, s);
+ parts_float_to_float_narrow(&p64, &p128, s);
+ return float64_round_pack_canonical(&p64, s);
+}
+
+float128 float32_to_float128(float32 a, float_status *s)
+{
+ FloatParts64 p64;
+ FloatParts128 p128;
+
+ float32_unpack_canonical(&p64, a, s);
+ parts_float_to_float_widen(&p128, &p64, s);
+ return float128_round_pack_canonical(&p128, s);
+}
+
+float128 float64_to_float128(float64 a, float_status *s)
+{
+ FloatParts64 p64;
+ FloatParts128 p128;
+
+ float64_unpack_canonical(&p64, a, s);
+ parts_float_to_float_widen(&p128, &p64, s);
+ return float128_round_pack_canonical(&p128, s);
+}
+
/*
* Rounds the floating-point value `a' to an integer, and returns the
* result as a floating-point value. The operation is performed
@@ -5175,38 +5244,6 @@ floatx80 float32_to_floatx80(float32 a, float_status *status)
}
-/*----------------------------------------------------------------------------
-| Returns the result of converting the single-precision floating-point value
-| `a' to the double-precision floating-point format. The conversion is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float32_to_float128(float32 a, float_status *status)
-{
- bool aSign;
- int aExp;
- uint32_t aSig;
-
- a = float32_squash_input_denormal(a, status);
- aSig = extractFloat32Frac( a );
- aExp = extractFloat32Exp( a );
- aSign = extractFloat32Sign( a );
- if ( aExp == 0xFF ) {
- if (aSig) {
- return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
- }
- return packFloat128( aSign, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
- normalizeFloat32Subnormal( aSig, &aExp, &aSig );
- --aExp;
- }
- return packFloat128( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<25, 0 );
-
-}
-
/*----------------------------------------------------------------------------
| Returns the remainder of the single-precision floating-point value `a'
| with respect to the corresponding value `b'. The operation is performed
@@ -5480,40 +5517,6 @@ floatx80 float64_to_floatx80(float64 a, float_status *status)
}
-/*----------------------------------------------------------------------------
-| Returns the result of converting the double-precision floating-point value
-| `a' to the quadruple-precision floating-point format. The conversion is
-| performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float64_to_float128(float64 a, float_status *status)
-{
- bool aSign;
- int aExp;
- uint64_t aSig, zSig0, zSig1;
-
- a = float64_squash_input_denormal(a, status);
- aSig = extractFloat64Frac( a );
- aExp = extractFloat64Exp( a );
- aSign = extractFloat64Sign( a );
- if ( aExp == 0x7FF ) {
- if (aSig) {
- return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
- }
- return packFloat128( aSign, 0x7FFF, 0, 0 );
- }
- if ( aExp == 0 ) {
- if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
- normalizeFloat64Subnormal( aSig, &aExp, &aSig );
- --aExp;
- }
- shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
- return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
-
-}
-
-
/*----------------------------------------------------------------------------
| Returns the remainder of the double-precision floating-point value `a'
| with respect to the corresponding value `b'. The operation is performed
@@ -6915,74 +6918,6 @@ uint32_t float128_to_uint32(float128 a, float_status *status)
return res;
}
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the single-precision floating-point format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 float128_to_float32(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp;
- uint64_t aSig0, aSig1;
- uint32_t zSig;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 ) {
- return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
- }
- return packFloat32( aSign, 0xFF, 0 );
- }
- aSig0 |= ( aSig1 != 0 );
- shift64RightJamming( aSig0, 18, &aSig0 );
- zSig = aSig0;
- if ( aExp || zSig ) {
- zSig |= 0x40000000;
- aExp -= 0x3F81;
- }
- return roundAndPackFloat32(aSign, aExp, zSig, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the quadruple-precision floating-point
-| value `a' to the double-precision floating-point format. The conversion
-| is performed according to the IEC/IEEE Standard for Binary Floating-Point
-| Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 float128_to_float64(float128 a, float_status *status)
-{
- bool aSign;
- int32_t aExp;
- uint64_t aSig0, aSig1;
-
- aSig1 = extractFloat128Frac1( a );
- aSig0 = extractFloat128Frac0( a );
- aExp = extractFloat128Exp( a );
- aSign = extractFloat128Sign( a );
- if ( aExp == 0x7FFF ) {
- if ( aSig0 | aSig1 ) {
- return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
- }
- return packFloat64( aSign, 0x7FF, 0 );
- }
- shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
- aSig0 |= ( aSig1 != 0 );
- if ( aExp || aSig0 ) {
- aSig0 |= UINT64_C(0x4000000000000000);
- aExp -= 0x3C01;
- }
- return roundAndPackFloat64(aSign, aExp, aSig0, status);
-
-}
-
/*----------------------------------------------------------------------------
| Returns the result of converting the quadruple-precision floating-point
| value `a' to the extended double-precision floating-point format. The
--
2.25.1
next prev parent reply other threads:[~2021-05-16 12:59 UTC|newest]
Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
2021-05-16 12:33 ` [PULL 01/46] qemu/host-utils: Use __builtin_bitreverseN Richard Henderson
2021-05-16 12:33 ` [PULL 02/46] qemu/host-utils: Add wrappers for overflow builtins Richard Henderson
2021-05-16 12:33 ` [PULL 03/46] qemu/host-utils: Add wrappers for carry builtins Richard Henderson
2021-05-16 12:33 ` [PULL 04/46] accel/tcg: Use add/sub overflow routines in tcg-runtime-gvec.c Richard Henderson
2021-05-16 12:33 ` [PULL 05/46] tests/fp: add quad support to the benchmark utility Richard Henderson
2021-05-16 12:33 ` [PULL 06/46] softfloat: Move the binary point to the msb Richard Henderson
2021-05-16 12:33 ` [PULL 07/46] softfloat: Inline float_raise Richard Henderson
2021-05-16 12:33 ` [PULL 08/46] softfloat: Use float_raise in more places Richard Henderson
2021-05-16 12:33 ` [PULL 09/46] softfloat: Tidy a * b + inf return Richard Henderson
2021-05-16 12:33 ` [PULL 10/46] softfloat: Add float_cmask and constants Richard Henderson
2021-05-16 12:33 ` [PULL 11/46] softfloat: Use return_nan in float_to_float Richard Henderson
2021-05-16 12:33 ` [PULL 12/46] softfloat: fix return_nan vs default_nan_mode Richard Henderson
2021-05-16 12:33 ` [PULL 13/46] target/mips: Set set_default_nan_mode with set_snan_bit_is_one Richard Henderson
2021-05-16 12:33 ` [PULL 14/46] softfloat: Do not produce a default_nan from parts_silence_nan Richard Henderson
2021-05-16 12:34 ` [PULL 15/46] softfloat: Rename FloatParts to FloatParts64 Richard Henderson
2021-05-16 12:34 ` [PULL 16/46] softfloat: Move type-specific pack/unpack routines Richard Henderson
2021-05-16 12:34 ` [PULL 17/46] softfloat: Use pointers with parts_default_nan Richard Henderson
2021-05-16 12:34 ` [PULL 18/46] softfloat: Use pointers with unpack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 19/46] softfloat: Use pointers with ftype_unpack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 20/46] softfloat: Use pointers with pack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 21/46] softfloat: Use pointers with ftype_pack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 22/46] softfloat: Use pointers with ftype_unpack_canonical Richard Henderson
2021-05-16 12:34 ` [PULL 23/46] softfloat: Use pointers with ftype_round_pack_canonical Richard Henderson
2021-05-16 12:34 ` [PULL 24/46] softfloat: Use pointers with parts_silence_nan Richard Henderson
2021-05-16 12:34 ` [PULL 25/46] softfloat: Rearrange FloatParts64 Richard Henderson
2021-05-16 12:34 ` [PULL 26/46] softfloat: Convert float128_silence_nan to parts Richard Henderson
2021-05-16 12:34 ` [PULL 27/46] softfloat: Convert float128_default_nan " Richard Henderson
2021-05-16 12:34 ` [PULL 28/46] softfloat: Move return_nan to softfloat-parts.c.inc Richard Henderson
2021-05-16 12:34 ` [PULL 29/46] softfloat: Move pick_nan " Richard Henderson
2021-05-16 12:34 ` [PULL 30/46] softfloat: Move pick_nan_muladd " Richard Henderson
2021-05-16 12:34 ` [PULL 31/46] softfloat: Move sf_canonicalize " Richard Henderson
2021-05-16 12:34 ` [PULL 32/46] softfloat: Move round_canonical " Richard Henderson
2021-05-16 12:34 ` [PULL 33/46] softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h Richard Henderson
2021-05-16 12:34 ` [PULL 34/46] softfloat: Move addsub_floats to softfloat-parts.c.inc Richard Henderson
2021-05-16 12:34 ` [PULL 35/46] softfloat: Implement float128_add/sub via parts Richard Henderson
2021-05-16 12:34 ` [PULL 36/46] softfloat: Move mul_floats to softfloat-parts.c.inc Richard Henderson
2021-05-16 12:34 ` [PULL 37/46] softfloat: Move muladd_floats " Richard Henderson
2021-05-16 12:34 ` [PULL 38/46] softfloat: Use mulu64 for mul64To128 Richard Henderson
2021-05-16 12:34 ` [PULL 39/46] softfloat: Use add192 in mul128To256 Richard Henderson
2021-05-16 12:34 ` [PULL 40/46] softfloat: Tidy mul128By64To192 Richard Henderson
2021-05-16 12:34 ` [PULL 41/46] softfloat: Introduce sh[lr]_double primitives Richard Henderson
2021-05-16 12:34 ` [PULL 42/46] softfloat: Move div_floats to softfloat-parts.c.inc Richard Henderson
2021-05-20 13:40 ` Peter Maydell
2021-05-20 16:15 ` Richard Henderson
2021-05-20 17:04 ` Peter Maydell
2021-05-16 12:34 ` [PULL 43/46] softfloat: Split float_to_float Richard Henderson
2021-05-16 12:34 ` Richard Henderson [this message]
2021-05-16 12:34 ` [PULL 45/46] softfloat: Move round_to_int to softfloat-parts.c.inc Richard Henderson
2021-05-16 12:34 ` [PULL 46/46] softfloat: Move round_to_int_and_pack " Richard Henderson
2021-05-16 13:19 ` [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) no-reply
2021-05-18 10:11 ` Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210516123431.718318-45-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=alex.bennee@linaro.org \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).