qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org, "Alex Bennée" <alex.bennee@linaro.org>
Subject: [PULL 35/46] softfloat: Implement float128_add/sub via parts
Date: Sun, 16 May 2021 07:34:20 -0500	[thread overview]
Message-ID: <20210516123431.718318-36-richard.henderson@linaro.org> (raw)
In-Reply-To: <20210516123431.718318-1-richard.henderson@linaro.org>

Replace the existing Berkeley implementation with the
FloatParts implementation.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 fpu/softfloat.c | 257 +++++++-----------------------------------------
 1 file changed, 36 insertions(+), 221 deletions(-)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index afeef00097..8f734f6020 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1046,6 +1046,20 @@ static float64 float64_round_pack_canonical(FloatParts64 *p,
     return float64_pack_raw(p);
 }
 
+static void float128_unpack_canonical(FloatParts128 *p, float128 f,
+                                      float_status *s)
+{
+    float128_unpack_raw(p, f);
+    parts_canonicalize(p, s, &float128_params);
+}
+
+static float128 float128_round_pack_canonical(FloatParts128 *p,
+                                              float_status *s)
+{
+    parts_uncanon(p, s, &float128_params);
+    return float128_pack_raw(p);
+}
+
 /*
  * Addition and subtraction
  */
@@ -1213,6 +1227,28 @@ bfloat16 bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
     return bfloat16_addsub(a, b, status, true);
 }
 
+static float128 QEMU_FLATTEN
+float128_addsub(float128 a, float128 b, float_status *status, bool subtract)
+{
+    FloatParts128 pa, pb, *pr;
+
+    float128_unpack_canonical(&pa, a, status);
+    float128_unpack_canonical(&pb, b, status);
+    pr = parts_addsub(&pa, &pb, status, subtract);
+
+    return float128_round_pack_canonical(pr, status);
+}
+
+float128 float128_add(float128 a, float128 b, float_status *status)
+{
+    return float128_addsub(a, b, status, false);
+}
+
+float128 float128_sub(float128 a, float128 b, float_status *status)
+{
+    return float128_addsub(a, b, status, true);
+}
+
 /*
  * Returns the result of multiplying the floating-point values `a' and
  * `b'. The operation is performed according to the IEC/IEEE Standard
@@ -7032,227 +7068,6 @@ float128 float128_round_to_int(float128 a, float_status *status)
 
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of adding the absolute values of the quadruple-precision
-| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
-| before being returned.  `zSign' is ignored if the result is a NaN.
-| The addition is performed according to the IEC/IEEE Standard for Binary
-| Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float128 addFloat128Sigs(float128 a, float128 b, bool zSign,
-                                float_status *status)
-{
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
-    int32_t expDiff;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    bSig1 = extractFloat128Frac1( b );
-    bSig0 = extractFloat128Frac0( b );
-    bExp = extractFloat128Exp( b );
-    expDiff = aExp - bExp;
-    if ( 0 < expDiff ) {
-        if ( aExp == 0x7FFF ) {
-            if (aSig0 | aSig1) {
-                return propagateFloat128NaN(a, b, status);
-            }
-            return a;
-        }
-        if ( bExp == 0 ) {
-            --expDiff;
-        }
-        else {
-            bSig0 |= UINT64_C(0x0001000000000000);
-        }
-        shift128ExtraRightJamming(
-            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
-        zExp = aExp;
-    }
-    else if ( expDiff < 0 ) {
-        if ( bExp == 0x7FFF ) {
-            if (bSig0 | bSig1) {
-                return propagateFloat128NaN(a, b, status);
-            }
-            return packFloat128( zSign, 0x7FFF, 0, 0 );
-        }
-        if ( aExp == 0 ) {
-            ++expDiff;
-        }
-        else {
-            aSig0 |= UINT64_C(0x0001000000000000);
-        }
-        shift128ExtraRightJamming(
-            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
-        zExp = bExp;
-    }
-    else {
-        if ( aExp == 0x7FFF ) {
-            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
-                return propagateFloat128NaN(a, b, status);
-            }
-            return a;
-        }
-        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
-        if ( aExp == 0 ) {
-            if (status->flush_to_zero) {
-                if (zSig0 | zSig1) {
-                    float_raise(float_flag_output_denormal, status);
-                }
-                return packFloat128(zSign, 0, 0, 0);
-            }
-            return packFloat128( zSign, 0, zSig0, zSig1 );
-        }
-        zSig2 = 0;
-        zSig0 |= UINT64_C(0x0002000000000000);
-        zExp = aExp;
-        goto shiftRight1;
-    }
-    aSig0 |= UINT64_C(0x0001000000000000);
-    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
-    --zExp;
-    if ( zSig0 < UINT64_C(0x0002000000000000) ) goto roundAndPack;
-    ++zExp;
- shiftRight1:
-    shift128ExtraRightJamming(
-        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
- roundAndPack:
-    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the absolute values of the quadruple-
-| precision floating-point values `a' and `b'.  If `zSign' is 1, the
-| difference is negated before being returned.  `zSign' is ignored if the
-| result is a NaN.  The subtraction is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-static float128 subFloat128Sigs(float128 a, float128 b, bool zSign,
-                                float_status *status)
-{
-    int32_t aExp, bExp, zExp;
-    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
-    int32_t expDiff;
-
-    aSig1 = extractFloat128Frac1( a );
-    aSig0 = extractFloat128Frac0( a );
-    aExp = extractFloat128Exp( a );
-    bSig1 = extractFloat128Frac1( b );
-    bSig0 = extractFloat128Frac0( b );
-    bExp = extractFloat128Exp( b );
-    expDiff = aExp - bExp;
-    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
-    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
-    if ( 0 < expDiff ) goto aExpBigger;
-    if ( expDiff < 0 ) goto bExpBigger;
-    if ( aExp == 0x7FFF ) {
-        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        float_raise(float_flag_invalid, status);
-        return float128_default_nan(status);
-    }
-    if ( aExp == 0 ) {
-        aExp = 1;
-        bExp = 1;
-    }
-    if ( bSig0 < aSig0 ) goto aBigger;
-    if ( aSig0 < bSig0 ) goto bBigger;
-    if ( bSig1 < aSig1 ) goto aBigger;
-    if ( aSig1 < bSig1 ) goto bBigger;
-    return packFloat128(status->float_rounding_mode == float_round_down,
-                        0, 0, 0);
- bExpBigger:
-    if ( bExp == 0x7FFF ) {
-        if (bSig0 | bSig1) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
-    }
-    if ( aExp == 0 ) {
-        ++expDiff;
-    }
-    else {
-        aSig0 |= UINT64_C(0x4000000000000000);
-    }
-    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
-    bSig0 |= UINT64_C(0x4000000000000000);
- bBigger:
-    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
-    zExp = bExp;
-    zSign ^= 1;
-    goto normalizeRoundAndPack;
- aExpBigger:
-    if ( aExp == 0x7FFF ) {
-        if (aSig0 | aSig1) {
-            return propagateFloat128NaN(a, b, status);
-        }
-        return a;
-    }
-    if ( bExp == 0 ) {
-        --expDiff;
-    }
-    else {
-        bSig0 |= UINT64_C(0x4000000000000000);
-    }
-    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
-    aSig0 |= UINT64_C(0x4000000000000000);
- aBigger:
-    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
-    zExp = aExp;
- normalizeRoundAndPack:
-    --zExp;
-    return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1,
-                                         status);
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of adding the quadruple-precision floating-point values
-| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
-| for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_add(float128 a, float128 b, float_status *status)
-{
-    bool aSign, bSign;
-
-    aSign = extractFloat128Sign( a );
-    bSign = extractFloat128Sign( b );
-    if ( aSign == bSign ) {
-        return addFloat128Sigs(a, b, aSign, status);
-    }
-    else {
-        return subFloat128Sigs(a, b, aSign, status);
-    }
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of subtracting the quadruple-precision floating-point
-| values `a' and `b'.  The operation is performed according to the IEC/IEEE
-| Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float128 float128_sub(float128 a, float128 b, float_status *status)
-{
-    bool aSign, bSign;
-
-    aSign = extractFloat128Sign( a );
-    bSign = extractFloat128Sign( b );
-    if ( aSign == bSign ) {
-        return subFloat128Sigs(a, b, aSign, status);
-    }
-    else {
-        return addFloat128Sigs(a, b, aSign, status);
-    }
-
-}
-
 /*----------------------------------------------------------------------------
 | Returns the result of multiplying the quadruple-precision floating-point
 | values `a' and `b'.  The operation is performed according to the IEC/IEEE
-- 
2.25.1



  parent reply	other threads:[~2021-05-16 12:49 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-16 12:33 [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) Richard Henderson
2021-05-16 12:33 ` [PULL 01/46] qemu/host-utils: Use __builtin_bitreverseN Richard Henderson
2021-05-16 12:33 ` [PULL 02/46] qemu/host-utils: Add wrappers for overflow builtins Richard Henderson
2021-05-16 12:33 ` [PULL 03/46] qemu/host-utils: Add wrappers for carry builtins Richard Henderson
2021-05-16 12:33 ` [PULL 04/46] accel/tcg: Use add/sub overflow routines in tcg-runtime-gvec.c Richard Henderson
2021-05-16 12:33 ` [PULL 05/46] tests/fp: add quad support to the benchmark utility Richard Henderson
2021-05-16 12:33 ` [PULL 06/46] softfloat: Move the binary point to the msb Richard Henderson
2021-05-16 12:33 ` [PULL 07/46] softfloat: Inline float_raise Richard Henderson
2021-05-16 12:33 ` [PULL 08/46] softfloat: Use float_raise in more places Richard Henderson
2021-05-16 12:33 ` [PULL 09/46] softfloat: Tidy a * b + inf return Richard Henderson
2021-05-16 12:33 ` [PULL 10/46] softfloat: Add float_cmask and constants Richard Henderson
2021-05-16 12:33 ` [PULL 11/46] softfloat: Use return_nan in float_to_float Richard Henderson
2021-05-16 12:33 ` [PULL 12/46] softfloat: fix return_nan vs default_nan_mode Richard Henderson
2021-05-16 12:33 ` [PULL 13/46] target/mips: Set set_default_nan_mode with set_snan_bit_is_one Richard Henderson
2021-05-16 12:33 ` [PULL 14/46] softfloat: Do not produce a default_nan from parts_silence_nan Richard Henderson
2021-05-16 12:34 ` [PULL 15/46] softfloat: Rename FloatParts to FloatParts64 Richard Henderson
2021-05-16 12:34 ` [PULL 16/46] softfloat: Move type-specific pack/unpack routines Richard Henderson
2021-05-16 12:34 ` [PULL 17/46] softfloat: Use pointers with parts_default_nan Richard Henderson
2021-05-16 12:34 ` [PULL 18/46] softfloat: Use pointers with unpack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 19/46] softfloat: Use pointers with ftype_unpack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 20/46] softfloat: Use pointers with pack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 21/46] softfloat: Use pointers with ftype_pack_raw Richard Henderson
2021-05-16 12:34 ` [PULL 22/46] softfloat: Use pointers with ftype_unpack_canonical Richard Henderson
2021-05-16 12:34 ` [PULL 23/46] softfloat: Use pointers with ftype_round_pack_canonical Richard Henderson
2021-05-16 12:34 ` [PULL 24/46] softfloat: Use pointers with parts_silence_nan Richard Henderson
2021-05-16 12:34 ` [PULL 25/46] softfloat: Rearrange FloatParts64 Richard Henderson
2021-05-16 12:34 ` [PULL 26/46] softfloat: Convert float128_silence_nan to parts Richard Henderson
2021-05-16 12:34 ` [PULL 27/46] softfloat: Convert float128_default_nan " Richard Henderson
2021-05-16 12:34 ` [PULL 28/46] softfloat: Move return_nan to softfloat-parts.c.inc Richard Henderson
2021-05-16 12:34 ` [PULL 29/46] softfloat: Move pick_nan " Richard Henderson
2021-05-16 12:34 ` [PULL 30/46] softfloat: Move pick_nan_muladd " Richard Henderson
2021-05-16 12:34 ` [PULL 31/46] softfloat: Move sf_canonicalize " Richard Henderson
2021-05-16 12:34 ` [PULL 32/46] softfloat: Move round_canonical " Richard Henderson
2021-05-16 12:34 ` [PULL 33/46] softfloat: Use uadd64_carry, usub64_borrow in softfloat-macros.h Richard Henderson
2021-05-16 12:34 ` [PULL 34/46] softfloat: Move addsub_floats to softfloat-parts.c.inc Richard Henderson
2021-05-16 12:34 ` Richard Henderson [this message]
2021-05-16 12:34 ` [PULL 36/46] softfloat: Move mul_floats " Richard Henderson
2021-05-16 12:34 ` [PULL 37/46] softfloat: Move muladd_floats " Richard Henderson
2021-05-16 12:34 ` [PULL 38/46] softfloat: Use mulu64 for mul64To128 Richard Henderson
2021-05-16 12:34 ` [PULL 39/46] softfloat: Use add192 in mul128To256 Richard Henderson
2021-05-16 12:34 ` [PULL 40/46] softfloat: Tidy mul128By64To192 Richard Henderson
2021-05-16 12:34 ` [PULL 41/46] softfloat: Introduce sh[lr]_double primitives Richard Henderson
2021-05-16 12:34 ` [PULL 42/46] softfloat: Move div_floats to softfloat-parts.c.inc Richard Henderson
2021-05-20 13:40   ` Peter Maydell
2021-05-20 16:15     ` Richard Henderson
2021-05-20 17:04       ` Peter Maydell
2021-05-16 12:34 ` [PULL 43/46] softfloat: Split float_to_float Richard Henderson
2021-05-16 12:34 ` [PULL 44/46] softfloat: Convert float-to-float conversions with float128 Richard Henderson
2021-05-16 12:34 ` [PULL 45/46] softfloat: Move round_to_int to softfloat-parts.c.inc Richard Henderson
2021-05-16 12:34 ` [PULL 46/46] softfloat: Move round_to_int_and_pack " Richard Henderson
2021-05-16 13:19 ` [PULL 00/46] softfloat: Convert float128 to FloatParts (part 1) no-reply
2021-05-18 10:11 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210516123431.718318-36-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=alex.bennee@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).