[Qemu-devel] [PATCH v4 18/22] fpu/softfloat: re-factor int/uint to float

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: "Alex Bennée" <alex.bennee@linaro.org>
To: richard.henderson@linaro.org, peter.maydell@linaro.org,
	laurent@vivier.eu, bharata@linux.vnet.ibm.com,
	andrew@andrewdutcher.com
Cc: qemu-devel@nongnu.org, "Alex Bennée" <alex.bennee@linaro.org>,
	"Aurelien Jarno" <aurelien@aurel32.net>
Subject: [Qemu-devel] [PATCH v4 18/22] fpu/softfloat: re-factor int/uint to float
Date: Tue,  6 Feb 2018 16:48:11 +0000	[thread overview]
Message-ID: <20180206164815.10084-19-alex.bennee@linaro.org> (raw)
In-Reply-To: <20180206164815.10084-1-alex.bennee@linaro.org>

These are considerably simpler as the lower order integers can just
use the higher order conversion function. As the decomposed fractional
part is a full 64 bit rounding and inexact handling comes from the
pack functions.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
v2
  - explicit setting of r.sign
v3
  - renaming of functions/structs
---
 fpu/softfloat.c         | 322 ++++++++++++++++++++++++------------------------
 include/fpu/softfloat.h |  30 ++---
 2 files changed, 172 insertions(+), 180 deletions(-)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 216d60df6e..9f9f101d35 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1501,6 +1501,169 @@ FLOAT_TO_UINT(64, 64)
 
 #undef FLOAT_TO_UINT
 
+/*
+ * Integer to float conversions
+ *
+ * Returns the result of converting the two's complement integer `a'
+ * to the floating-point format. The conversion is performed according
+ * to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+ */
+
+static FloatParts int_to_float(int64_t a, float_status *status)
+{
+    FloatParts r;
+    if (a == 0) {
+        r.cls = float_class_zero;
+        r.sign = false;
+    } else if (a == (1ULL << 63)) {
+        r.cls = float_class_normal;
+        r.sign = true;
+        r.frac = DECOMPOSED_IMPLICIT_BIT;
+        r.exp = 63;
+    } else {
+        uint64_t f;
+        if (a < 0) {
+            f = -a;
+            r.sign = true;
+        } else {
+            f = a;
+            r.sign = false;
+        }
+        int shift = clz64(f) - 1;
+        r.cls = float_class_normal;
+        r.exp = (DECOMPOSED_BINARY_POINT - shift);
+        r.frac = f << shift;
+    }
+
+    return r;
+}
+
+float16 int64_to_float16(int64_t a, float_status *status)
+{
+    FloatParts pa = int_to_float(a, status);
+    return float16_round_pack_canonical(pa, status);
+}
+
+float16 int32_to_float16(int32_t a, float_status *status)
+{
+    return int64_to_float16(a, status);
+}
+
+float16 int16_to_float16(int16_t a, float_status *status)
+{
+    return int64_to_float16(a, status);
+}
+
+float32 int64_to_float32(int64_t a, float_status *status)
+{
+    FloatParts pa = int_to_float(a, status);
+    return float32_round_pack_canonical(pa, status);
+}
+
+float32 int32_to_float32(int32_t a, float_status *status)
+{
+    return int64_to_float32(a, status);
+}
+
+float32 int16_to_float32(int16_t a, float_status *status)
+{
+    return int64_to_float32(a, status);
+}
+
+float64 int64_to_float64(int64_t a, float_status *status)
+{
+    FloatParts pa = int_to_float(a, status);
+    return float64_round_pack_canonical(pa, status);
+}
+
+float64 int32_to_float64(int32_t a, float_status *status)
+{
+    return int64_to_float64(a, status);
+}
+
+float64 int16_to_float64(int16_t a, float_status *status)
+{
+    return int64_to_float64(a, status);
+}
+
+
+/*
+ * Unsigned Integer to float conversions
+ *
+ * Returns the result of converting the unsigned integer `a' to the
+ * floating-point format. The conversion is performed according to the
+ * IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+ */
+
+static FloatParts uint_to_float(uint64_t a, float_status *status)
+{
+    FloatParts r = { .sign = false};
+
+    if (a == 0) {
+        r.cls = float_class_zero;
+    } else {
+        int spare_bits = clz64(a) - 1;
+        r.cls = float_class_normal;
+        r.exp = DECOMPOSED_BINARY_POINT - spare_bits;
+        if (spare_bits < 0) {
+            shift64RightJamming(a, -spare_bits, &a);
+            r.frac = a;
+        } else {
+            r.frac = a << spare_bits;
+        }
+    }
+
+    return r;
+}
+
+float16 uint64_to_float16(uint64_t a, float_status *status)
+{
+    FloatParts pa = uint_to_float(a, status);
+    return float16_round_pack_canonical(pa, status);
+}
+
+float16 uint32_to_float16(uint32_t a, float_status *status)
+{
+    return uint64_to_float16(a, status);
+}
+
+float16 uint16_to_float16(uint16_t a, float_status *status)
+{
+    return uint64_to_float16(a, status);
+}
+
+float32 uint64_to_float32(uint64_t a, float_status *status)
+{
+    FloatParts pa = uint_to_float(a, status);
+    return float32_round_pack_canonical(pa, status);
+}
+
+float32 uint32_to_float32(uint32_t a, float_status *status)
+{
+    return uint64_to_float32(a, status);
+}
+
+float32 uint16_to_float32(uint16_t a, float_status *status)
+{
+    return uint64_to_float32(a, status);
+}
+
+float64 uint64_to_float64(uint64_t a, float_status *status)
+{
+    FloatParts pa = uint_to_float(a, status);
+    return float64_round_pack_canonical(pa, status);
+}
+
+float64 uint32_to_float64(uint32_t a, float_status *status)
+{
+    return uint64_to_float64(a, status);
+}
+
+float64 uint16_to_float64(uint16_t a, float_status *status)
+{
+    return uint64_to_float64(a, status);
+}
+
 /*----------------------------------------------------------------------------
 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
 | and 7, and returns the properly rounded 32-bit integer corresponding to the
@@ -2592,43 +2755,6 @@ static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
 
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the 32-bit two's complement integer `a'
-| to the single-precision floating-point format.  The conversion is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 int32_to_float32(int32_t a, float_status *status)
-{
-    flag zSign;
-
-    if ( a == 0 ) return float32_zero;
-    if ( a == (int32_t) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
-    zSign = ( a < 0 );
-    return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status);
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the 32-bit two's complement integer `a'
-| to the double-precision floating-point format.  The conversion is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 int32_to_float64(int32_t a, float_status *status)
-{
-    flag zSign;
-    uint32_t absA;
-    int8_t shiftCount;
-    uint64_t zSig;
-
-    if ( a == 0 ) return float64_zero;
-    zSign = ( a < 0 );
-    absA = zSign ? - a : a;
-    shiftCount = countLeadingZeros32( absA ) + 21;
-    zSig = absA;
-    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
-
-}
 
 /*----------------------------------------------------------------------------
 | Returns the result of converting the 32-bit two's complement integer `a'
@@ -2675,56 +2801,6 @@ float128 int32_to_float128(int32_t a, float_status *status)
 
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the 64-bit two's complement integer `a'
-| to the single-precision floating-point format.  The conversion is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 int64_to_float32(int64_t a, float_status *status)
-{
-    flag zSign;
-    uint64_t absA;
-    int8_t shiftCount;
-
-    if ( a == 0 ) return float32_zero;
-    zSign = ( a < 0 );
-    absA = zSign ? - a : a;
-    shiftCount = countLeadingZeros64( absA ) - 40;
-    if ( 0 <= shiftCount ) {
-        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
-    }
-    else {
-        shiftCount += 7;
-        if ( shiftCount < 0 ) {
-            shift64RightJamming( absA, - shiftCount, &absA );
-        }
-        else {
-            absA <<= shiftCount;
-        }
-        return roundAndPackFloat32(zSign, 0x9C - shiftCount, absA, status);
-    }
-
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the 64-bit two's complement integer `a'
-| to the double-precision floating-point format.  The conversion is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 int64_to_float64(int64_t a, float_status *status)
-{
-    flag zSign;
-
-    if ( a == 0 ) return float64_zero;
-    if ( a == (int64_t) LIT64( 0x8000000000000000 ) ) {
-        return packFloat64( 1, 0x43E, 0 );
-    }
-    zSign = ( a < 0 );
-    return normalizeRoundAndPackFloat64(zSign, 0x43C, zSign ? -a : a, status);
-}
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the 64-bit two's complement integer `a'
 | to the extended double-precision floating-point format.  The conversion
@@ -2779,65 +2855,6 @@ float128 int64_to_float128(int64_t a, float_status *status)
 
 }
 
-/*----------------------------------------------------------------------------
-| Returns the result of converting the 64-bit unsigned integer `a'
-| to the single-precision floating-point format.  The conversion is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float32 uint64_to_float32(uint64_t a, float_status *status)
-{
-    int shiftcount;
-
-    if (a == 0) {
-        return float32_zero;
-    }
-
-    /* Determine (left) shift needed to put first set bit into bit posn 23
-     * (since packFloat32() expects the binary point between bits 23 and 22);
-     * this is the fast case for smallish numbers.
-     */
-    shiftcount = countLeadingZeros64(a) - 40;
-    if (shiftcount >= 0) {
-        return packFloat32(0, 0x95 - shiftcount, a << shiftcount);
-    }
-    /* Otherwise we need to do a round-and-pack. roundAndPackFloat32()
-     * expects the binary point between bits 30 and 29, hence the + 7.
-     */
-    shiftcount += 7;
-    if (shiftcount < 0) {
-        shift64RightJamming(a, -shiftcount, &a);
-    } else {
-        a <<= shiftcount;
-    }
-
-    return roundAndPackFloat32(0, 0x9c - shiftcount, a, status);
-}
-
-/*----------------------------------------------------------------------------
-| Returns the result of converting the 64-bit unsigned integer `a'
-| to the double-precision floating-point format.  The conversion is performed
-| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
-*----------------------------------------------------------------------------*/
-
-float64 uint64_to_float64(uint64_t a, float_status *status)
-{
-    int exp = 0x43C;
-    int shiftcount;
-
-    if (a == 0) {
-        return float64_zero;
-    }
-
-    shiftcount = countLeadingZeros64(a) - 1;
-    if (shiftcount < 0) {
-        shift64RightJamming(a, -shiftcount, &a);
-    } else {
-        a <<= shiftcount;
-    }
-    return roundAndPackFloat64(0, exp - shiftcount, a, status);
-}
-
 /*----------------------------------------------------------------------------
 | Returns the result of converting the 64-bit unsigned integer `a'
 | to the quadruple-precision floating-point format.  The conversion is performed
@@ -6715,19 +6732,6 @@ int float128_unordered_quiet(float128 a, float128 b, float_status *status)
     return 0;
 }
 
-/* misc functions */
-float32 uint32_to_float32(uint32_t a, float_status *status)
-{
-    return int64_to_float32(a, status);
-}
-
-float64 uint32_to_float64(uint32_t a, float_status *status)
-{
-    return int64_to_float64(a, status);
-}
-
-
-
 #define COMPARE(s, nan_exp)                                                  \
 static inline int float ## s ## _compare_internal(float ## s a, float ## s b,\
                                       int is_quiet, float_status *status)    \
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index ec1e701c26..3e6fdd756a 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -190,9 +190,13 @@ enum {
 /*----------------------------------------------------------------------------
 | Software IEC/IEEE integer-to-floating-point conversion routines.
 *----------------------------------------------------------------------------*/
+float32 int16_to_float32(int16_t, float_status *status);
 float32 int32_to_float32(int32_t, float_status *status);
+float64 int16_to_float64(int16_t, float_status *status);
 float64 int32_to_float64(int32_t, float_status *status);
+float32 uint16_to_float32(uint16_t, float_status *status);
 float32 uint32_to_float32(uint32_t, float_status *status);
+float64 uint16_to_float64(uint16_t, float_status *status);
 float64 uint32_to_float64(uint32_t, float_status *status);
 floatx80 int32_to_floatx80(int32_t, float_status *status);
 float128 int32_to_float128(int32_t, float_status *status);
@@ -204,27 +208,6 @@ float32 uint64_to_float32(uint64_t, float_status *status);
 float64 uint64_to_float64(uint64_t, float_status *status);
 float128 uint64_to_float128(uint64_t, float_status *status);
 
-/* We provide the int16 versions for symmetry of API with float-to-int */
-static inline float32 int16_to_float32(int16_t v, float_status *status)
-{
-    return int32_to_float32(v, status);
-}
-
-static inline float32 uint16_to_float32(uint16_t v, float_status *status)
-{
-    return uint32_to_float32(v, status);
-}
-
-static inline float64 int16_to_float64(int16_t v, float_status *status)
-{
-    return int32_to_float64(v, status);
-}
-
-static inline float64 uint16_to_float64(uint16_t v, float_status *status)
-{
-    return uint32_to_float64(v, status);
-}
-
 /*----------------------------------------------------------------------------
 | Software half-precision conversion routines.
 *----------------------------------------------------------------------------*/
@@ -245,6 +228,11 @@ uint64_t float16_to_uint64(float16 a, float_status *status);
 int64_t float16_to_int64_round_to_zero(float16, float_status *status);
 uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *status);
 float16 int16_to_float16(int16_t a, float_status *status);
+float16 int32_to_float16(int32_t a, float_status *status);
+float16 int64_to_float16(int64_t a, float_status *status);
+float16 uint16_to_float16(uint16_t a, float_status *status);
+float16 uint32_to_float16(uint32_t a, float_status *status);
+float16 uint64_to_float16(uint64_t a, float_status *status);
 
 /*----------------------------------------------------------------------------
 | Software half-precision operations.
-- 
2.15.1

next prev parent reply	other threads:[~2018-02-06 16:55 UTC|newest]

Thread overview: 43+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-02-06 16:47 [Qemu-devel] [PATCH v4 00/22] re-factor softfloat and add fp16 functions Alex Bennée
2018-02-06 16:47 ` [Qemu-devel] [PATCH v4 01/22] fpu/softfloat: implement float16_squash_input_denormal Alex Bennée
2018-02-06 16:47 ` [Qemu-devel] [PATCH v4 02/22] include/fpu/softfloat: remove USE_SOFTFLOAT_STRUCT_TYPES Alex Bennée
2018-02-06 16:47 ` [Qemu-devel] [PATCH v4 03/22] fpu/softfloat-types: new header to prevent excessive re-builds Alex Bennée
2018-02-06 16:47 ` [Qemu-devel] [PATCH v4 04/22] target/*/cpu.h: remove softfloat.h Alex Bennée
2018-02-06 16:47 ` [Qemu-devel] [PATCH v4 05/22] include/fpu/softfloat: implement float16_abs helper Alex Bennée
2018-02-06 16:47 ` [Qemu-devel] [PATCH v4 06/22] include/fpu/softfloat: implement float16_chs helper Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 07/22] include/fpu/softfloat: implement float16_set_sign helper Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 08/22] include/fpu/softfloat: add some float16 constants Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 09/22] fpu/softfloat: improve comments on ARM NaN propagation Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 10/22] fpu/softfloat: move the extract functions to the top of the file Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 11/22] fpu/softfloat: define decompose structures Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 12/22] fpu/softfloat: re-factor add/sub Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 13/22] fpu/softfloat: re-factor mul Alex Bennée
2018-02-13 15:20   ` Peter Maydell
2018-02-13 15:39     ` Richard Henderson
2018-02-19 16:04     ` Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 14/22] fpu/softfloat: re-factor div Alex Bennée
2018-02-13 15:22   ` Peter Maydell
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 15/22] fpu/softfloat: re-factor muladd Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 16/22] fpu/softfloat: re-factor round_to_int Alex Bennée
2018-02-13 15:14   ` Peter Maydell
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 17/22] fpu/softfloat: re-factor float to int/uint Alex Bennée
2018-02-06 16:48 ` Alex Bennée [this message]
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 19/22] fpu/softfloat: re-factor scalbn Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 20/22] fpu/softfloat: re-factor minmax Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 21/22] fpu/softfloat: re-factor compare Alex Bennée
2018-02-06 16:48 ` [Qemu-devel] [PATCH v4 22/22] fpu/softfloat: re-factor sqrt Alex Bennée
2018-02-13 15:50   ` Peter Maydell
2018-02-13 16:23     ` Richard Henderson
2018-02-13 16:34       ` Peter Maydell
2018-02-20 21:01     ` [Qemu-devel] [PATCH] fpu/softfloat: use hardware sqrt if we can (EXPERIMENT!) Alex Bennée
2018-02-21 20:44       ` Alex Bennée
2018-03-21 20:16       ` Emilio G. Cota
2018-02-13 17:50   ` [Qemu-devel] [PATCH v4 22/22] fpu/softfloat: re-factor sqrt Richard Henderson
2018-02-06 17:42 ` [Qemu-devel] [PATCH v4 00/22] re-factor softfloat and add fp16 functions no-reply
2018-02-13 14:52 ` Peter Maydell
2018-02-17 13:23   ` Alex Bennée
2018-02-19 13:56     ` Peter Maydell
2018-02-22  1:23       ` Fam Zheng
2018-02-13 15:51 ` Peter Maydell
2018-02-13 17:31   ` Laurent Vivier
2018-02-20 12:43     ` Alex Bennée

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:216d60df6 dfblob:9f9f101d3 dfblob:ec1e701c2 dfblob:3e6fdd756 )
 OR (
bs:"[Qemu-devel] [PATCH v4 18/22] fpu/softfloat: re-factor int/uint to float" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180206164815.10084-19-alex.bennee@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=andrew@andrewdutcher.com \
    --cc=aurelien@aurel32.net \
    --cc=bharata@linux.vnet.ibm.com \
    --cc=laurent@vivier.eu \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).