[PATCH 0/3] Implement blfoat16 in softfloat

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 0/3] Implement blfoat16 in softfloat
@ 2020-08-13  7:14 LIU Zhiwei
  2020-08-13  7:14 ` [PATCH 1/3] fpu/softfloat: Define operations for bfloat16 LIU Zhiwei
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: LIU Zhiwei @ 2020-08-13  7:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: alex.bennee, richard.henderson, LIU Zhiwei, aurelien,
	peter.maydell

As bfloat16 is more and more popular in many archs, implement bfloat16
interfaces in softfloat, so that archs can add their bfloat16 insns
based on the blfoat16 interfaces here.

These interfaces have been tested by RISU on RISC-V bfloat16 with
XuanTie C906.

LIU Zhiwei (3):
  fpu/softfloat: Define operations for bfloat16
  fpu/softfloat: Define convert operations for bfloat16
  fpu/softfloat: Define misc operations for bfloat16

 fpu/softfloat-specialize.inc.c |  38 ++++
 fpu/softfloat.c                | 391 +++++++++++++++++++++++++++++++++
 include/fpu/softfloat-types.h  |   5 +
 include/fpu/softfloat.h        | 133 +++++++++++
 4 files changed, 567 insertions(+)

-- 
2.23.0



^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] fpu/softfloat: Define operations for bfloat16
  2020-08-13  7:14 [PATCH 0/3] Implement blfoat16 in softfloat LIU Zhiwei
@ 2020-08-13  7:14 ` LIU Zhiwei
  2020-08-13  7:14 ` [PATCH 2/3] fpu/softfloat: Define convert " LIU Zhiwei
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: LIU Zhiwei @ 2020-08-13  7:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: alex.bennee, richard.henderson, LIU Zhiwei, aurelien,
	peter.maydell

This patch implements operations for bfloat16 except conversion and some misc
operations. We also add FloatFmt and pack/unpack interfaces for bfloat16.
As they are both static fields, we can't make a sperate patch for them.

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 fpu/softfloat.c               | 168 ++++++++++++++++++++++++++++++++++
 include/fpu/softfloat-types.h |   5 +
 include/fpu/softfloat.h       |  44 +++++++++
 3 files changed, 217 insertions(+)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index 79be4f5840..d4205f92d5 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -554,6 +554,10 @@ static const FloatFmt float16_params_ahp = {
     .arm_althp = true
 };
 
+static const FloatFmt bfloat16_params = {
+    FLOAT_PARAMS(8, 7)
+};
+
 static const FloatFmt float32_params = {
     FLOAT_PARAMS(8, 23)
 };
@@ -580,6 +584,11 @@ static inline FloatParts float16_unpack_raw(float16 f)
     return unpack_raw(float16_params, f);
 }
 
+static inline FloatParts bfloat16_unpack_raw(bfloat16 f)
+{
+    return unpack_raw(bfloat16_params, f);
+}
+
 static inline FloatParts float32_unpack_raw(float32 f)
 {
     return unpack_raw(float32_params, f);
@@ -603,6 +612,11 @@ static inline float16 float16_pack_raw(FloatParts p)
     return make_float16(pack_raw(float16_params, p));
 }
 
+static inline bfloat16 bfloat16_pack_raw(FloatParts p)
+{
+    return pack_raw(bfloat16_params, p);
+}
+
 static inline float32 float32_pack_raw(FloatParts p)
 {
     return make_float32(pack_raw(float32_params, p));
@@ -820,6 +834,11 @@ static FloatParts float16_unpack_canonical(float16 f, float_status *s)
     return float16a_unpack_canonical(f, s, &float16_params);
 }
 
+static FloatParts bfloat16_unpack_canonical(bfloat16 f, float_status *s)
+{
+    return sf_canonicalize(bfloat16_unpack_raw(f), &bfloat16_params, s);
+}
+
 static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
                                              const FloatFmt *params)
 {
@@ -831,6 +850,11 @@ static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
     return float16a_round_pack_canonical(p, s, &float16_params);
 }
 
+static bfloat16 bfloat16_round_pack_canonical(FloatParts p, float_status *s)
+{
+    return bfloat16_pack_raw(round_canonical(p, s, &bfloat16_params));
+}
+
 static FloatParts float32_unpack_canonical(float32 f, float_status *s)
 {
     return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
@@ -1158,6 +1182,28 @@ float64_sub(float64 a, float64 b, float_status *s)
     return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
 }
 
+/*
+ * Returns the result of adding or subtracting the bfloat16
+ * values `a' and `b'.
+ */
+bfloat16 QEMU_FLATTEN bfloat16_add(bfloat16 a, bfloat16 b, float_status *status)
+{
+    FloatParts pa = bfloat16_unpack_canonical(a, status);
+    FloatParts pb = bfloat16_unpack_canonical(b, status);
+    FloatParts pr = addsub_floats(pa, pb, false, status);
+
+    return bfloat16_round_pack_canonical(pr, status);
+}
+
+bfloat16 QEMU_FLATTEN bfloat16_sub(bfloat16 a, bfloat16 b, float_status *status)
+{
+    FloatParts pa = bfloat16_unpack_canonical(a, status);
+    FloatParts pb = bfloat16_unpack_canonical(b, status);
+    FloatParts pr = addsub_floats(pa, pb, true, status);
+
+    return bfloat16_round_pack_canonical(pr, status);
+}
+
 /*
  * Returns the result of multiplying the floating-point values `a' and
  * `b'. The operation is performed according to the IEC/IEEE Standard
@@ -1260,6 +1306,20 @@ float64_mul(float64 a, float64 b, float_status *s)
                         f64_is_zon2, f64_addsubmul_post);
 }
 
+/*
+ * Returns the result of multiplying the bfloat16
+ * values `a' and `b'.
+ */
+
+bfloat16 QEMU_FLATTEN bfloat16_mul(bfloat16 a, bfloat16 b, float_status *status)
+{
+    FloatParts pa = bfloat16_unpack_canonical(a, status);
+    FloatParts pb = bfloat16_unpack_canonical(b, status);
+    FloatParts pr = mul_floats(pa, pb, status);
+
+    return bfloat16_round_pack_canonical(pr, status);
+}
+
 /*
  * Returns the result of multiplying the floating-point values `a' and
  * `b' then adding 'c', with no intermediate rounding step after the
@@ -1642,6 +1702,23 @@ float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
     return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
 }
 
+/*
+ * Returns the result of multiplying the bfloat16 values `a'
+ * and `b' then adding 'c', with no intermediate rounding step after the
+ * multiplication.
+ */
+
+bfloat16 QEMU_FLATTEN bfloat16_muladd(bfloat16 a, bfloat16 b, bfloat16 c,
+                                      int flags, float_status *status)
+{
+    FloatParts pa = bfloat16_unpack_canonical(a, status);
+    FloatParts pb = bfloat16_unpack_canonical(b, status);
+    FloatParts pc = bfloat16_unpack_canonical(c, status);
+    FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
+
+    return bfloat16_round_pack_canonical(pr, status);
+}
+
 /*
  * Returns the result of dividing the floating-point value `a' by the
  * corresponding value `b'. The operation is performed according to
@@ -1808,6 +1885,20 @@ float64_div(float64 a, float64 b, float_status *s)
                         f64_div_pre, f64_div_post);
 }
 
+/*
+ * Returns the result of dividing the bfloat16
+ * value `a' by the corresponding value `b'.
+ */
+
+bfloat16 bfloat16_div(bfloat16 a, bfloat16 b, float_status *status)
+{
+    FloatParts pa = bfloat16_unpack_canonical(a, status);
+    FloatParts pb = bfloat16_unpack_canonical(b, status);
+    FloatParts pr = div_floats(pa, pb, status);
+
+    return bfloat16_round_pack_canonical(pr, status);
+}
+
 /*
  * Float to Float conversions
  *
@@ -2847,6 +2938,25 @@ MINMAX(64, maxnummag, false, true, true)
 
 #undef MINMAX
 
+#define BF16_MINMAX(name, ismin, isiee, ismag)                          \
+bfloat16 bfloat16_ ## name(bfloat16 a, bfloat16 b, float_status *s)     \
+{                                                                       \
+    FloatParts pa = bfloat16_unpack_canonical(a, s);                    \
+    FloatParts pb = bfloat16_unpack_canonical(b, s);                    \
+    FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s);      \
+                                                                        \
+    return bfloat16_round_pack_canonical(pr, s);                        \
+}
+
+BF16_MINMAX(min, true, false, false)
+BF16_MINMAX(minnum, true, true, false)
+BF16_MINMAX(minnummag, true, true, true)
+BF16_MINMAX(max, false, false, false)
+BF16_MINMAX(maxnum, false, true, false)
+BF16_MINMAX(maxnummag, false, true, true)
+
+#undef BF16_MINMAX
+
 /* Floating point compare */
 static FloatRelation compare_floats(FloatParts a, FloatParts b, bool is_quiet,
                                     float_status *s)
@@ -3008,6 +3118,24 @@ FloatRelation float64_compare_quiet(float64 a, float64 b, float_status *s)
     return f64_compare(a, b, true, s);
 }
 
+static int QEMU_FLATTEN
+soft_bf16_compare(bfloat16 a, bfloat16 b, bool is_quiet, float_status *s)
+{
+    FloatParts pa = bfloat16_unpack_canonical(a, s);
+    FloatParts pb = bfloat16_unpack_canonical(b, s);
+    return compare_floats(pa, pb, is_quiet, s);
+}
+
+int bfloat16_compare(bfloat16 a, bfloat16 b, float_status *s)
+{
+    return soft_bf16_compare(a, b, false, s);
+}
+
+int bfloat16_compare_quiet(bfloat16 a, bfloat16 b, float_status *s)
+{
+    return soft_bf16_compare(a, b, true, s);
+}
+
 /* Multiply A by 2 raised to the power N.  */
 static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
 {
@@ -3047,6 +3175,13 @@ float64 float64_scalbn(float64 a, int n, float_status *status)
     return float64_round_pack_canonical(pr, status);
 }
 
+bfloat16 bfloat16_scalbn(bfloat16 a, int n, float_status *status)
+{
+    FloatParts pa = bfloat16_unpack_canonical(a, status);
+    FloatParts pr = scalbn_decomposed(pa, n, status);
+    return bfloat16_round_pack_canonical(pr, status);
+}
+
 /*
  * Square Root
  *
@@ -3197,6 +3332,13 @@ float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
     return soft_f64_sqrt(ua.s, s);
 }
 
+bfloat16 QEMU_FLATTEN bfloat16_sqrt(bfloat16 a, float_status *status)
+{
+    FloatParts pa = bfloat16_unpack_canonical(a, status);
+    FloatParts pr = sqrt_float(pa, status, &bfloat16_params);
+    return bfloat16_round_pack_canonical(pr, status);
+}
+
 /*----------------------------------------------------------------------------
 | The pattern for a default generated NaN.
 *----------------------------------------------------------------------------*/
@@ -3239,6 +3381,13 @@ float128 float128_default_nan(float_status *status)
     return r;
 }
 
+bfloat16 bfloat16_default_nan(float_status *status)
+{
+    FloatParts p = parts_default_nan(status);
+    p.frac >>= bfloat16_params.frac_shift;
+    return bfloat16_pack_raw(p);
+}
+
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN from a signalling NaN for the floating point value `a'.
 *----------------------------------------------------------------------------*/
@@ -3270,6 +3419,14 @@ float64 float64_silence_nan(float64 a, float_status *status)
     return float64_pack_raw(p);
 }
 
+bfloat16 bfloat16_silence_nan(bfloat16 a, float_status *status)
+{
+    FloatParts p = bfloat16_unpack_raw(a);
+    p.frac <<= bfloat16_params.frac_shift;
+    p = parts_silence_nan(p, status);
+    p.frac >>= bfloat16_params.frac_shift;
+    return bfloat16_pack_raw(p);
+}
 
 /*----------------------------------------------------------------------------
 | If `a' is denormal and we are in flush-to-zero mode then set the
@@ -3319,6 +3476,17 @@ float64 float64_squash_input_denormal(float64 a, float_status *status)
     return a;
 }
 
+bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status)
+{
+    if (status->flush_inputs_to_zero) {
+        FloatParts p = bfloat16_unpack_raw(a);
+        if (parts_squash_denormal(p, status)) {
+            return bfloat16_set_sign(bfloat16_zero, p.sign);
+        }
+    }
+    return a;
+}
+
 /*----------------------------------------------------------------------------
 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
 | and 7, and returns the properly rounded 32-bit integer corresponding to the
diff --git a/include/fpu/softfloat-types.h b/include/fpu/softfloat-types.h
index 7680193ebc..3a3bf55631 100644
--- a/include/fpu/softfloat-types.h
+++ b/include/fpu/softfloat-types.h
@@ -112,6 +112,11 @@ typedef struct {
 #define make_float128(high_, low_) ((float128) { .high = high_, .low = low_ })
 #define make_float128_init(high_, low_) { .high = high_, .low = low_ }
 
+/*
+ * Software neural-network floating-point types.
+ */
+typedef uint16_t bfloat16;
+
 /*
  * Software IEC/IEEE floating-point underflow tininess-detection mode.
  */
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 659218b5c7..198f954e4d 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -239,6 +239,37 @@ bool float16_is_quiet_nan(float16, float_status *status);
 bool float16_is_signaling_nan(float16, float_status *status);
 float16 float16_silence_nan(float16, float_status *status);
 
+/*----------------------------------------------------------------------------
+| Software bfloat16 operations.
+*----------------------------------------------------------------------------*/
+
+bfloat16 bfloat16_add(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_sub(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_mul(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_div(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_muladd(bfloat16, bfloat16, bfloat16, int,
+                         float_status *status);
+float16 bfloat16_scalbn(bfloat16, int, float_status *status);
+bfloat16 bfloat16_min(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_max(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_minnum(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_maxnum(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_minnummag(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_maxnummag(bfloat16, bfloat16, float_status *status);
+bfloat16 bfloat16_sqrt(bfloat16, float_status *status);
+int bfloat16_compare(bfloat16, bfloat16, float_status *status);
+int bfloat16_compare_quiet(bfloat16, bfloat16, float_status *status);
+int bfloat16_unordered_quiet(bfloat16, bfloat16, float_status *status);
+int bfloat16_le(bfloat16, bfloat16, float_status *status);
+int bfloat16_lt(bfloat16, bfloat16, float_status *status);
+int bfloat16_eq_quiet(bfloat16, bfloat16, float_status *status);
+
+int bfloat16_is_quiet_nan(bfloat16, float_status *status);
+int bfloat16_is_signaling_nan(bfloat16, float_status *status);
+bfloat16 bfloat16_silence_nan(bfloat16, float_status *status);
+bfloat16 bfloat16_default_nan(float_status *status);
+bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
+
 static inline bool float16_is_any_nan(float16 a)
 {
     return ((float16_val(a) & ~0x8000) > 0x7c00);
@@ -293,6 +324,19 @@ static inline float16 float16_set_sign(float16 a, int sign)
 #define float16_three make_float16(0x4200)
 #define float16_infinity make_float16(0x7c00)
 
+static inline bfloat16 bfloat16_set_sign(bfloat16 a, int sign)
+{
+    return (a & 0x7fff) | (sign << 15);
+}
+
+#define bfloat16_zero 0
+#define bfloat16_half 0x3f00
+#define bfloat16_one 0x3f80
+#define bfloat16_one_point_five 0x3fc0
+#define bfloat16_two 0x4000
+#define bfloat16_three 0x4040
+#define bfloat16_infinity 0x7f80
+
 /*----------------------------------------------------------------------------
 | The pattern for a default generated half-precision NaN.
 *----------------------------------------------------------------------------*/
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] fpu/softfloat: Define convert operations for bfloat16
  2020-08-13  7:14 [PATCH 0/3] Implement blfoat16 in softfloat LIU Zhiwei
  2020-08-13  7:14 ` [PATCH 1/3] fpu/softfloat: Define operations for bfloat16 LIU Zhiwei
@ 2020-08-13  7:14 ` LIU Zhiwei
  2020-08-13  7:14 ` [PATCH 3/3] fpu/softfloat: Define misc " LIU Zhiwei
  2020-08-13 16:06 ` [PATCH 0/3] Implement blfoat16 in softfloat Richard Henderson
  3 siblings, 0 replies; 5+ messages in thread
From: LIU Zhiwei @ 2020-08-13  7:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: alex.bennee, richard.henderson, LIU Zhiwei, aurelien,
	peter.maydell

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 fpu/softfloat.c         | 223 ++++++++++++++++++++++++++++++++++++++++
 include/fpu/softfloat.h |  48 +++++++++
 2 files changed, 271 insertions(+)

diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index d4205f92d5..afb121135d 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -2014,6 +2014,34 @@ float32 float64_to_float32(float64 a, float_status *s)
     return float32_round_pack_canonical(pr, s);
 }
 
+float32 bfloat16_to_float32(bfloat16 a, float_status *s)
+{
+    FloatParts p = bfloat16_unpack_canonical(a, s);
+    FloatParts pr = float_to_float(p, &float32_params, s);
+    return float32_round_pack_canonical(pr, s);
+}
+
+float64 bfloat16_to_float64(bfloat16 a, float_status *s)
+{
+    FloatParts p = bfloat16_unpack_canonical(a, s);
+    FloatParts pr = float_to_float(p, &float64_params, s);
+    return float64_round_pack_canonical(pr, s);
+}
+
+bfloat16 float32_to_bfloat16(float32 a, float_status *s)
+{
+    FloatParts p = float32_unpack_canonical(a, s);
+    FloatParts pr = float_to_float(p, &bfloat16_params, s);
+    return bfloat16_round_pack_canonical(pr, s);
+}
+
+bfloat16 float64_to_bfloat16(float64 a, float_status *s)
+{
+    FloatParts p = float64_unpack_canonical(a, s);
+    FloatParts pr = float_to_float(p, &bfloat16_params, s);
+    return bfloat16_round_pack_canonical(pr, s);
+}
+
 /*
  * Rounds the floating-point value `a' to an integer, and returns the
  * result as a floating-point value. The operation is performed
@@ -2143,6 +2171,18 @@ float64 float64_round_to_int(float64 a, float_status *s)
     return float64_round_pack_canonical(pr, s);
 }
 
+/*
+ * Rounds the bfloat16 value `a' to an integer, and returns the
+ * result as a bfloat16 value.
+ */
+
+bfloat16 bfloat16_round_to_int(bfloat16 a, float_status *s)
+{
+    FloatParts pa = bfloat16_unpack_canonical(a, s);
+    FloatParts pr = round_to_int(pa, s->float_rounding_mode, 0, s);
+    return bfloat16_round_pack_canonical(pr, s);
+}
+
 /*
  * Returns the result of converting the floating-point value `a' to
  * the two's complement integer format. The conversion is performed
@@ -2353,6 +2393,62 @@ int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
     return float64_to_int64_scalbn(a, float_round_to_zero, 0, s);
 }
 
+/*
+ * Returns the result of converting the floating-point value `a' to
+ * the two's complement integer format.
+ */
+
+int16_t bfloat16_to_int16_scalbn(bfloat16 a, int rmode, int scale,
+                                 float_status *s)
+{
+    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
+                                 rmode, scale, INT16_MIN, INT16_MAX, s);
+}
+
+int32_t bfloat16_to_int32_scalbn(bfloat16 a, int rmode, int scale,
+                                 float_status *s)
+{
+    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
+                                 rmode, scale, INT32_MIN, INT32_MAX, s);
+}
+
+int64_t bfloat16_to_int64_scalbn(bfloat16 a, int rmode, int scale,
+                                 float_status *s)
+{
+    return round_to_int_and_pack(bfloat16_unpack_canonical(a, s),
+                                 rmode, scale, INT64_MIN, INT64_MAX, s);
+}
+
+int16_t bfloat16_to_int16(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int16_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
+int32_t bfloat16_to_int32(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int32_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
+int64_t bfloat16_to_int64(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int64_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
+int16_t bfloat16_to_int16_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int16_scalbn(a, float_round_to_zero, 0, s);
+}
+
+int32_t bfloat16_to_int32_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int32_scalbn(a, float_round_to_zero, 0, s);
+}
+
+int64_t bfloat16_to_int64_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_int64_scalbn(a, float_round_to_zero, 0, s);
+}
+
 /*
  *  Returns the result of converting the floating-point value `a' to
  *  the unsigned integer format. The conversion is performed according
@@ -2566,6 +2662,62 @@ uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
     return float64_to_uint64_scalbn(a, float_round_to_zero, 0, s);
 }
 
+/*
+ *  Returns the result of converting the bfloat16 value `a' to
+ *  the unsigned integer format.
+ */
+
+uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, int rmode, int scale,
+                                   float_status *s)
+{
+    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
+                                  rmode, scale, UINT16_MAX, s);
+}
+
+uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, int rmode, int scale,
+                                   float_status *s)
+{
+    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
+                                  rmode, scale, UINT32_MAX, s);
+}
+
+uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, int rmode, int scale,
+                                   float_status *s)
+{
+    return round_to_uint_and_pack(bfloat16_unpack_canonical(a, s),
+                                  rmode, scale, UINT64_MAX, s);
+}
+
+uint16_t bfloat16_to_uint16(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint16_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
+uint32_t bfloat16_to_uint32(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint32_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
+uint64_t bfloat16_to_uint64(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint64_scalbn(a, s->float_rounding_mode, 0, s);
+}
+
+uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint16_scalbn(a, float_round_to_zero, 0, s);
+}
+
+uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint32_scalbn(a, float_round_to_zero, 0, s);
+}
+
+uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *s)
+{
+    return bfloat16_to_uint64_scalbn(a, float_round_to_zero, 0, s);
+}
+
 /*
  * Integer to float conversions
  *
@@ -2692,6 +2844,41 @@ float64 int16_to_float64(int16_t a, float_status *status)
     return int64_to_float64_scalbn(a, 0, status);
 }
 
+/*
+ * Returns the result of converting the two's complement integer `a'
+ * to the bfloat16 format.
+ */
+
+bfloat16 int64_to_bfloat16_scalbn(int64_t a, int scale, float_status *status)
+{
+    FloatParts pa = int_to_float(a, scale, status);
+    return bfloat16_round_pack_canonical(pa, status);
+}
+
+bfloat16 int32_to_bfloat16_scalbn(int32_t a, int scale, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, scale, status);
+}
+
+bfloat16 int16_to_bfloat16_scalbn(int16_t a, int scale, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, scale, status);
+}
+
+bfloat16 int64_to_bfloat16(int64_t a, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, 0, status);
+}
+
+bfloat16 int32_to_bfloat16(int32_t a, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, 0, status);
+}
+
+bfloat16 int16_to_bfloat16(int16_t a, float_status *status)
+{
+    return int64_to_bfloat16_scalbn(a, 0, status);
+}
 
 /*
  * Unsigned Integer to float conversions
@@ -2817,6 +3004,42 @@ float64 uint16_to_float64(uint16_t a, float_status *status)
     return uint64_to_float64_scalbn(a, 0, status);
 }
 
+/*
+ * Returns the result of converting the unsigned integer `a' to the
+ * bfloat16 format.
+ */
+
+bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int scale, float_status *status)
+{
+    FloatParts pa = uint_to_float(a, scale, status);
+    return bfloat16_round_pack_canonical(pa, status);
+}
+
+bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int scale, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, scale, status);
+}
+
+bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int scale, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, scale, status);
+}
+
+bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, 0, status);
+}
+
+bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, 0, status);
+}
+
+bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status)
+{
+    return uint64_to_bfloat16_scalbn(a, 0, status);
+}
+
 /* Float Min/Max */
 /* min() and max() functions. These can't be implemented as
  * 'compare and pick one input' because that would mishandle
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 198f954e4d..4f72665b02 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -270,6 +270,54 @@ bfloat16 bfloat16_silence_nan(bfloat16, float_status *status);
 bfloat16 bfloat16_default_nan(float_status *status);
 bfloat16 bfloat16_squash_input_denormal(bfloat16 a, float_status *status);
 
+/*----------------------------------------------------------------------------
+| Software bfloat16 conversion routines.
+*----------------------------------------------------------------------------*/
+
+bfloat16 bfloat16_round_to_int(bfloat16, float_status *status);
+bfloat16 float32_to_bfloat16(float32, float_status *status);
+float32 bfloat16_to_float32(bfloat16, float_status *status);
+bfloat16 float64_to_bfloat16(float64 a, float_status *status);
+float64 bfloat16_to_float64(bfloat16 a, float_status *status);
+
+int16_t bfloat16_to_int16_scalbn(bfloat16, int, int, float_status *status);
+int32_t bfloat16_to_int32_scalbn(bfloat16, int, int, float_status *status);
+int64_t bfloat16_to_int64_scalbn(bfloat16, int, int, float_status *status);
+
+int16_t bfloat16_to_int16(bfloat16, float_status *status);
+int32_t bfloat16_to_int32(bfloat16, float_status *status);
+int64_t bfloat16_to_int64(bfloat16, float_status *status);
+
+int16_t bfloat16_to_int16_round_to_zero(bfloat16, float_status *status);
+int32_t bfloat16_to_int32_round_to_zero(bfloat16, float_status *status);
+int64_t bfloat16_to_int64_round_to_zero(bfloat16, float_status *status);
+
+uint16_t bfloat16_to_uint16_scalbn(bfloat16 a, int, int, float_status *status);
+uint32_t bfloat16_to_uint32_scalbn(bfloat16 a, int, int, float_status *status);
+uint64_t bfloat16_to_uint64_scalbn(bfloat16 a, int, int, float_status *status);
+
+uint16_t bfloat16_to_uint16(bfloat16 a, float_status *status);
+uint32_t bfloat16_to_uint32(bfloat16 a, float_status *status);
+uint64_t bfloat16_to_uint64(bfloat16 a, float_status *status);
+
+uint16_t bfloat16_to_uint16_round_to_zero(bfloat16 a, float_status *status);
+uint32_t bfloat16_to_uint32_round_to_zero(bfloat16 a, float_status *status);
+uint64_t bfloat16_to_uint64_round_to_zero(bfloat16 a, float_status *status);
+
+bfloat16 int16_to_bfloat16_scalbn(int16_t a, int, float_status *status);
+bfloat16 int32_to_bfloat16_scalbn(int32_t a, int, float_status *status);
+bfloat16 int64_to_bfloat16_scalbn(int64_t a, int, float_status *status);
+bfloat16 uint16_to_bfloat16_scalbn(uint16_t a, int, float_status *status);
+bfloat16 uint32_to_bfloat16_scalbn(uint32_t a, int, float_status *status);
+bfloat16 uint64_to_bfloat16_scalbn(uint64_t a, int, float_status *status);
+
+bfloat16 int16_to_bfloat16(int16_t a, float_status *status);
+bfloat16 int32_to_bfloat16(int32_t a, float_status *status);
+bfloat16 int64_to_bfloat16(int64_t a, float_status *status);
+bfloat16 uint16_to_bfloat16(uint16_t a, float_status *status);
+bfloat16 uint32_to_bfloat16(uint32_t a, float_status *status);
+bfloat16 uint64_to_bfloat16(uint64_t a, float_status *status);
+
 static inline bool float16_is_any_nan(float16 a)
 {
     return ((float16_val(a) & ~0x8000) > 0x7c00);
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] fpu/softfloat: Define misc operations for bfloat16
  2020-08-13  7:14 [PATCH 0/3] Implement blfoat16 in softfloat LIU Zhiwei
  2020-08-13  7:14 ` [PATCH 1/3] fpu/softfloat: Define operations for bfloat16 LIU Zhiwei
  2020-08-13  7:14 ` [PATCH 2/3] fpu/softfloat: Define convert " LIU Zhiwei
@ 2020-08-13  7:14 ` LIU Zhiwei
  2020-08-13 16:06 ` [PATCH 0/3] Implement blfoat16 in softfloat Richard Henderson
  3 siblings, 0 replies; 5+ messages in thread
From: LIU Zhiwei @ 2020-08-13  7:14 UTC (permalink / raw)
  To: qemu-devel
  Cc: alex.bennee, richard.henderson, LIU Zhiwei, aurelien,
	peter.maydell

Signed-off-by: LIU Zhiwei <zhiwei_liu@c-sky.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
 fpu/softfloat-specialize.inc.c | 38 +++++++++++++++++++++++++++++++
 include/fpu/softfloat.h        | 41 ++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

diff --git a/fpu/softfloat-specialize.inc.c b/fpu/softfloat-specialize.inc.c
index 034d18199c..1b6c4e47f8 100644
--- a/fpu/softfloat-specialize.inc.c
+++ b/fpu/softfloat-specialize.inc.c
@@ -259,6 +259,25 @@ bool float16_is_quiet_nan(float16 a_, float_status *status)
 #endif
 }
 
+/*----------------------------------------------------------------------------
+| Returns 1 if the bfloat16 value `a' is a quiet
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int bfloat16_is_quiet_nan(bfloat16 a_, float_status *status)
+{
+#ifdef NO_SIGNALING_NANS
+    return bfloat16_is_any_nan(a_);
+#else
+    uint16_t a = a_;
+    if (snan_bit_is_one(status)) {
+        return (((a >> 6) & 0x1FF) == 0x1FE) && (a & 0x3F);
+    } else {
+        return ((a >> 6) & 0x1FF) == 0x1FF;
+    }
+#endif
+}
+
 /*----------------------------------------------------------------------------
 | Returns 1 if the half-precision floating-point value `a' is a signaling
 | NaN; otherwise returns 0.
@@ -278,6 +297,25 @@ bool float16_is_signaling_nan(float16 a_, float_status *status)
 #endif
 }
 
+/*----------------------------------------------------------------------------
+| Returns 1 if the bfloat16 value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+int bfloat16_is_signaling_nan(bfloat16 a_, float_status *status)
+{
+#ifdef NO_SIGNALING_NANS
+    return 0;
+#else
+    uint16_t a = a_;
+    if (snan_bit_is_one(status)) {
+        return ((a >> 6) & 0x1FF) == 0x1FF;
+    } else {
+        return (((a >> 6) & 0x1FF) == 0x1FE) && (a & 0x3F);
+    }
+#endif
+}
+
 /*----------------------------------------------------------------------------
 | Returns 1 if the single-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index 4f72665b02..d7ce3e3483 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -372,6 +372,47 @@ static inline float16 float16_set_sign(float16 a, int sign)
 #define float16_three make_float16(0x4200)
 #define float16_infinity make_float16(0x7c00)
 
+static inline int bfloat16_is_any_nan(bfloat16 a)
+{
+    return ((a & ~0x8000) > 0x7F80);
+}
+
+static inline int bfloat16_is_neg(bfloat16 a)
+{
+    return a >> 15;
+}
+
+static inline int bfloat16_is_infinity(bfloat16 a)
+{
+    return (a & 0x7fff) == 0x7F80;
+}
+
+static inline int bfloat16_is_zero(bfloat16 a)
+{
+    return (a & 0x7fff) == 0;
+}
+
+static inline int bfloat16_is_zero_or_denormal(bfloat16 a)
+{
+    return (a & 0x7F80) == 0;
+}
+
+static inline bfloat16 bfloat16_abs(bfloat16 a)
+{
+    /* Note that abs does *not* handle NaN specially, nor does
+     * it flush denormal inputs to zero.
+     */
+    return a & 0x7fff;
+}
+
+static inline bfloat16 bfloat16_chs(bfloat16 a)
+{
+    /* Note that chs does *not* handle NaN specially, nor does
+     * it flush denormal inputs to zero.
+     */
+    return a ^ 0x8000;
+}
+
 static inline bfloat16 bfloat16_set_sign(bfloat16 a, int sign)
 {
     return (a & 0x7fff) | (sign << 15);
-- 
2.23.0



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 0/3] Implement blfoat16 in softfloat
  2020-08-13  7:14 [PATCH 0/3] Implement blfoat16 in softfloat LIU Zhiwei
                   ` (2 preceding siblings ...)
  2020-08-13  7:14 ` [PATCH 3/3] fpu/softfloat: Define misc " LIU Zhiwei
@ 2020-08-13 16:06 ` Richard Henderson
  3 siblings, 0 replies; 5+ messages in thread
From: Richard Henderson @ 2020-08-13 16:06 UTC (permalink / raw)
  To: LIU Zhiwei, qemu-devel; +Cc: peter.maydell, alex.bennee, aurelien

On 8/13/20 12:14 AM, LIU Zhiwei wrote:
> As bfloat16 is more and more popular in many archs, implement bfloat16
> interfaces in softfloat, so that archs can add their bfloat16 insns
> based on the blfoat16 interfaces here.
> 
> These interfaces have been tested by RISU on RISC-V bfloat16 with
> XuanTie C906.
> 
> LIU Zhiwei (3):
>   fpu/softfloat: Define operations for bfloat16
>   fpu/softfloat: Define convert operations for bfloat16
>   fpu/softfloat: Define misc operations for bfloat16
> 
>  fpu/softfloat-specialize.inc.c |  38 ++++
>  fpu/softfloat.c                | 391 +++++++++++++++++++++++++++++++++
>  include/fpu/softfloat-types.h  |   5 +
>  include/fpu/softfloat.h        | 133 +++++++++++
>  4 files changed, 567 insertions(+)
> 

Thanks.  Queued to softfloat-next.


r~


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2020-08-13 16:07 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-08-13  7:14 [PATCH 0/3] Implement blfoat16 in softfloat LIU Zhiwei
2020-08-13  7:14 ` [PATCH 1/3] fpu/softfloat: Define operations for bfloat16 LIU Zhiwei
2020-08-13  7:14 ` [PATCH 2/3] fpu/softfloat: Define convert " LIU Zhiwei
2020-08-13  7:14 ` [PATCH 3/3] fpu/softfloat: Define misc " LIU Zhiwei
2020-08-13 16:06 ` [PATCH 0/3] Implement blfoat16 in softfloat Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).