* [Qemu-devel] [PATCH v5 1/5] fpu: softfloat: Add normalize_roundpack_float32 function
2016-01-02 22:25 [Qemu-devel] [PATCH v5 0/5] target-tilegx: Implement floating point instructions chengang
@ 2016-01-02 22:25 ` chengang
2016-01-02 23:55 ` Chen Gang
2016-01-02 22:25 ` [Qemu-devel] [PATCH v5 2/5] target-tilegx/helper-fshared.h: Add floating point shared function chengang
` (3 subsequent siblings)
4 siblings, 1 reply; 7+ messages in thread
From: chengang @ 2016-01-02 22:25 UTC (permalink / raw)
To: peter.maydell, rth, cmetcalf; +Cc: qemu-devel, Chen Gang
From: Chen Gang <gang.chen.5i5j@gmail.com>
It is based on (u)int32_to_float32 function to support float32 packing.
Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
---
fpu/softfloat.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++
include/fpu/softfloat.h | 8 +++++++
2 files changed, 63 insertions(+)
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index f1170fe..dba8566 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -7080,6 +7080,61 @@ float64 uint32_to_float64(uint32_t a, float_status *status)
return int64_to_float64(a, status);
}
+/*
+ * The mantissa contents the hide bit, e.g. exp: 0x9e with sig: 1 means 1.0f.
+ *
+ * It references from int32_to_float32() and uint32_to_float32()
+ */
+float32 normalize_roundpack_float32(flag sign, int_fast16_t exp, uint32_t sig,
+ float_status *status)
+{
+ uint64_t absa = sig;
+ int8_t scount;
+
+ if (exp >= 0xff) {
+ return packFloat32(sign, 0xFF, 0);
+ } else if (exp <= 0) {
+ shift32RightJamming(sig, 0 - exp, &sig);
+ return packFloat32(sign, 0, sig);
+ }
+
+ if (sign) {
+ if (sig & 0x7FFFFFFF) {
+ return normalizeRoundAndPackFloat32(1, exp - 2, sig, status);
+ }
+ if (sig) {
+ return packFloat32(1, exp, 0);
+ } else {
+ return float32_zero;
+ }
+ }
+
+ if (!sig) {
+ return float32_zero;
+ }
+
+ scount = countLeadingZeros64(absa) - 40;
+ if (scount >= 0) {
+ exp -= 7 + scount + 2;
+ if (exp <= 0) {
+ return packFloat32(0, 0, absa);
+ }
+ return packFloat32(0, exp, absa << scount);
+ }
+
+ scount += 7;
+ exp -= scount + 2;
+ if (exp <= 0) {
+ return packFloat32(0, 0, absa);
+ }
+ if (scount < 0) {
+ shift64RightJamming(absa, 0 - scount, &absa);
+ } else {
+ absa <<= scount;
+ }
+ return roundAndPackFloat32(0, exp, absa, status);
+}
+
uint32 float32_to_uint32(float32 a, float_status *status)
{
int64_t v;
diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
index ded34eb..4995a15 100644
--- a/include/fpu/softfloat.h
+++ b/include/fpu/softfloat.h
@@ -422,6 +422,14 @@ int float32_is_signaling_nan( float32 );
float32 float32_maybe_silence_nan( float32 );
float32 float32_scalbn(float32, int, float_status *status);
+/*
+ * The mantissa contents the hide bit, e.g. exp: 0x9e with sig: 1 means 1.0f.
+ *
+ * It references from int32_to_float32() and uint32_to_float32()
+ */
+float32 normalize_roundpack_float32(flag sign, int_fast16_t exp, uint32_t sig,
+ float_status *status);
+
static inline float32 float32_abs(float32 a)
{
/* Note that abs does *not* handle NaN specially, nor does
--
1.9.3
^ permalink raw reply related [flat|nested] 7+ messages in thread* Re: [Qemu-devel] [PATCH v5 1/5] fpu: softfloat: Add normalize_roundpack_float32 function
2016-01-02 22:25 ` [Qemu-devel] [PATCH v5 1/5] fpu: softfloat: Add normalize_roundpack_float32 function chengang
@ 2016-01-02 23:55 ` Chen Gang
0 siblings, 0 replies; 7+ messages in thread
From: Chen Gang @ 2016-01-02 23:55 UTC (permalink / raw)
To: chengang, peter.maydell, rth, cmetcalf; +Cc: qemu-devel
For sig == 0 case, the original implementation is incorrect (although it
passes gcc testsuite), it needs to consider about sign for float_zero.
The related fix diff for it is below. After patches v5 are finished
reviewing, I shall merge the fix diff below to patch v6, next.
Thanks.
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index dba8566..5ad8bb5 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -7098,19 +7098,15 @@ float32 normalize_roundpack_float32(flag sign, int_fast16_t exp, uint32_t sig,
return packFloat32(sign, 0, sig);
}
+ if (!sig) {
+ return float32_set_sign(float32_zero, sign);
+ }
+
if (sign) {
if (sig & 0x7FFFFFFF) {
return normalizeRoundAndPackFloat32(1, exp - 2, sig, status);
}
- if (sig) {
- return packFloat32(1, exp, 0);
- } else {
- return float32_zero;
- }
- }
-
- if (!sig) {
- return float32_zero;
+ return packFloat32(1, exp, 0);
}
scount = countLeadingZeros64(absa) - 40;
On 1/3/16 06:25, chengang@emindsoft.com.cn wrote:
> From: Chen Gang <gang.chen.5i5j@gmail.com>
>
> It is based on (u)int32_to_float32 function to support float32 packing.
>
> Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
> ---
> fpu/softfloat.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++
> include/fpu/softfloat.h | 8 +++++++
> 2 files changed, 63 insertions(+)
>
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index f1170fe..dba8566 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -7080,6 +7080,61 @@ float64 uint32_to_float64(uint32_t a, float_status *status)
> return int64_to_float64(a, status);
> }
>
> +/*
> + * The mantissa contents the hide bit, e.g. exp: 0x9e with sig: 1 means 1.0f.
> + *
> + * It references from int32_to_float32() and uint32_to_float32()
> + */
> +float32 normalize_roundpack_float32(flag sign, int_fast16_t exp, uint32_t sig,
> + float_status *status)
> +{
> + uint64_t absa = sig;
> + int8_t scount;
> +
> + if (exp >= 0xff) {
> + return packFloat32(sign, 0xFF, 0);
> + } else if (exp <= 0) {
> + shift32RightJamming(sig, 0 - exp, &sig);
> + return packFloat32(sign, 0, sig);
> + }
> +
> + if (sign) {
> + if (sig & 0x7FFFFFFF) {
> + return normalizeRoundAndPackFloat32(1, exp - 2, sig, status);
> + }
> + if (sig) {
> + return packFloat32(1, exp, 0);
> + } else {
> + return float32_zero;
> + }
> + }
> +
> + if (!sig) {
> + return float32_zero;
> + }
> +
> + scount = countLeadingZeros64(absa) - 40;
> + if (scount >= 0) {
> + exp -= 7 + scount + 2;
> + if (exp <= 0) {
> + return packFloat32(0, 0, absa);
> + }
> + return packFloat32(0, exp, absa << scount);
> + }
> +
> + scount += 7;
> + exp -= scount + 2;
> + if (exp <= 0) {
> + return packFloat32(0, 0, absa);
> + }
> + if (scount < 0) {
> + shift64RightJamming(absa, 0 - scount, &absa);
> + } else {
> + absa <<= scount;
> + }
> + return roundAndPackFloat32(0, exp, absa, status);
> +}
> +
> uint32 float32_to_uint32(float32 a, float_status *status)
> {
> int64_t v;
> diff --git a/include/fpu/softfloat.h b/include/fpu/softfloat.h
> index ded34eb..4995a15 100644
> --- a/include/fpu/softfloat.h
> +++ b/include/fpu/softfloat.h
> @@ -422,6 +422,14 @@ int float32_is_signaling_nan( float32 );
> float32 float32_maybe_silence_nan( float32 );
> float32 float32_scalbn(float32, int, float_status *status);
>
> +/*
> + * The mantissa contents the hide bit, e.g. exp: 0x9e with sig: 1 means 1.0f.
> + *
> + * It references from int32_to_float32() and uint32_to_float32()
> + */
> +float32 normalize_roundpack_float32(flag sign, int_fast16_t exp, uint32_t sig,
> + float_status *status);
> +
> static inline float32 float32_abs(float32 a)
> {
> /* Note that abs does *not* handle NaN specially, nor does
>
--
Chen Gang (陈刚)
Open, share, and attitude like air, water, and life which God blessed
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [Qemu-devel] [PATCH v5 2/5] target-tilegx/helper-fshared.h: Add floating point shared function
2016-01-02 22:25 [Qemu-devel] [PATCH v5 0/5] target-tilegx: Implement floating point instructions chengang
2016-01-02 22:25 ` [Qemu-devel] [PATCH v5 1/5] fpu: softfloat: Add normalize_roundpack_float32 function chengang
@ 2016-01-02 22:25 ` chengang
2016-01-02 22:25 ` [Qemu-devel] [PATCH v5 3/5] target-tilegx/helper-fsingle.c: Implement single floating point chengang
` (2 subsequent siblings)
4 siblings, 0 replies; 7+ messages in thread
From: chengang @ 2016-01-02 22:25 UTC (permalink / raw)
To: peter.maydell, rth, cmetcalf; +Cc: qemu-devel, Chen Gang
From: Chen Gang <gang.chen.5i5j@gmail.com>
It is used by fsingle and fdouble helpers.
Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
---
target-tilegx/helper-fshared.h | 56 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 56 insertions(+)
create mode 100644 target-tilegx/helper-fshared.h
diff --git a/target-tilegx/helper-fshared.h b/target-tilegx/helper-fshared.h
new file mode 100644
index 0000000..bcd673e
--- /dev/null
+++ b/target-tilegx/helper-fshared.h
@@ -0,0 +1,56 @@
+/*
+ * TILE-Gx virtual Floating point shared functions
+ *
+ * Copyright (c) 2015 Chen Gang
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#define DEC_INIT_FPSTATUS \
+ float_status fp_status = {.float_rounding_mode = float_round_nearest_even};
+
+static inline uint64_t create_fsfd_flag_un(void)
+{
+ return 1 << 25;
+}
+
+static inline uint64_t create_fsfd_flag_lt(void)
+{
+ return 1 << 26;
+}
+
+static inline uint64_t create_fsfd_flag_le(void)
+{
+ return 1 << 27;
+}
+
+static inline uint64_t create_fsfd_flag_gt(void)
+{
+ return 1 << 28;
+}
+
+static inline uint64_t create_fsfd_flag_ge(void)
+{
+ return 1 << 29;
+}
+
+static inline uint64_t create_fsfd_flag_eq(void)
+{
+ return 1 << 30;
+}
+
+static inline uint64_t create_fsfd_flag_ne(void)
+{
+ return 1ULL << 31;
+}
--
1.9.3
^ permalink raw reply related [flat|nested] 7+ messages in thread* [Qemu-devel] [PATCH v5 3/5] target-tilegx/helper-fsingle.c: Implement single floating point
2016-01-02 22:25 [Qemu-devel] [PATCH v5 0/5] target-tilegx: Implement floating point instructions chengang
2016-01-02 22:25 ` [Qemu-devel] [PATCH v5 1/5] fpu: softfloat: Add normalize_roundpack_float32 function chengang
2016-01-02 22:25 ` [Qemu-devel] [PATCH v5 2/5] target-tilegx/helper-fshared.h: Add floating point shared function chengang
@ 2016-01-02 22:25 ` chengang
2016-01-02 22:25 ` [Qemu-devel] [PATCH v5 4/5] target-tilegx/helper-fdouble.c: Implement double " chengang
2016-01-02 22:25 ` [Qemu-devel] [PATCH v5 5/5] target-tilegx: Integrate floating pointer implementation chengang
4 siblings, 0 replies; 7+ messages in thread
From: chengang @ 2016-01-02 22:25 UTC (permalink / raw)
To: peter.maydell, rth, cmetcalf; +Cc: qemu-devel, Chen Gang
From: Chen Gang <gang.chen.5i5j@gmail.com>
Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
---
target-tilegx/helper-fsingle.c | 200 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 200 insertions(+)
create mode 100644 target-tilegx/helper-fsingle.c
diff --git a/target-tilegx/helper-fsingle.c b/target-tilegx/helper-fsingle.c
new file mode 100644
index 0000000..997d40e
--- /dev/null
+++ b/target-tilegx/helper-fsingle.c
@@ -0,0 +1,200 @@
+/*
+ * QEMU TILE-Gx helpers
+ *
+ * Copyright (c) 2015 Chen Gang
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#include "cpu.h"
+#include "qemu-common.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+
+#include "helper-fshared.h"
+
+#define IMPL_FULL
+
+/*
+ * FSingle instructions implemenation:
+ *
+ * fsingle_add1 ; calc srca and srcb,
+ * ; convert float_32 to TileGXFPSFmt result.
+ * ; move TileGXFPSFmt result to dest.
+ *
+ * fsingle_sub1 ; calc srca and srcb.
+ * ; convert float_32 to TileGXFPSFmt result.
+ * ; move TileGXFPSFmt result to dest.
+ *
+ * fsingle_addsub2 ; nop.
+ *
+ * fsingle_mul1 ; calc srca and srcb.
+ * ; convert float_32 value to TileGXFPSFmt result.
+ * ; move TileGXFPSFmt result to dest.
+ *
+ * fsingle_mul2 ; move srca to dest.
+ *
+ * fsingle_pack1 ; nop
+ *
+ * fsingle_pack2 ; treate srca as TileGXFPSFmt result.
+ * ; convert TileGXFPSFmt result to float_32 value.
+ * ; move float_32 value to dest.
+ */
+
+static inline uint32_t get_fsingle_exp(uint64_t n)
+{
+ return n & 0xff;
+}
+
+static inline uint32_t get_fsingle_sign(uint64_t n)
+{
+ return test_bit(10, &n);
+}
+
+static inline unsigned int get_fsingle_man(uint64_t n)
+{
+ return n >> 32;
+}
+
+#ifdef IMPL_FULL
+
+static uint32_t get_f32_exp(float32 f)
+{
+ return extract32(float32_val(f), 23, 8);
+}
+
+static uint32_t get_f32_man(float32 f)
+{
+ return float32_val(f) & 0x7fffff;
+}
+
+static inline void set_fsingle_sign(uint64_t *n)
+{
+ set_bit(10, n);
+}
+
+static inline uint64_t create_fsingle_exp(float32 f)
+{
+ return get_f32_exp(f) & 0xff;
+}
+
+static inline uint64_t create_fsingle_man(float32 f)
+{
+ if (get_f32_exp(f)) {
+ return (uint64_t)get_f32_man(f) << 40 | (1ULL << 63);
+ }
+ return (uint64_t)get_f32_man(f) << 32;
+}
+
+static uint64_t float32_to_sfmt(float32 f)
+{
+ uint64_t sfmt = 0;
+
+ if (float32_is_neg(f)) {
+ set_fsingle_sign(&sfmt);
+ }
+ sfmt |= create_fsingle_exp(f);
+ sfmt |= create_fsingle_man(f);
+
+ return sfmt;
+}
+
+#else
+
+#define TILEGX_F_CALC_CVT 0 /* convert int to fsingle */
+#define TILEGX_F_CALC_NCVT 1 /* Not convertion */
+
+static inline unsigned int get_fsingle_calc(uint64_t n)
+{
+ return test_bit(11, &n);
+}
+
+static inline void set_fsingle_calc(uint64_t *n, uint32_t calc)
+{
+ set_bit(11, n);
+}
+
+static uint64_t float32_to_sfmt(float32 f)
+{
+ return (uint64_t)float32_val(f) << 32;
+}
+
+#endif
+
+uint64_t helper_fsingle_pack2(uint64_t srca)
+{
+ DEC_INIT_FPSTATUS;
+
+#ifndef IMPL_FULL
+ if (get_fsingle_calc(srca) == TILEGX_F_CALC_NCVT) {
+ return srca >> 32;
+ }
+#endif
+ return float32_val(normalize_roundpack_float32(get_fsingle_sign(srca),
+ get_fsingle_exp(srca),
+ get_fsingle_man(srca),
+ &fp_status));
+}
+
+static uint64_t main_calc(float32 fsrca, float32 fsrcb,
+ float32 (*calc)(float32, float32, float_status*))
+{
+ DEC_INIT_FPSTATUS;
+ uint64_t sfmt = float32_to_sfmt(calc(fsrca, fsrcb, &fp_status));
+
+ if (float32_eq(fsrca, fsrcb, &fp_status)) {
+ sfmt |= create_fsfd_flag_eq();
+ } else {
+ sfmt |= create_fsfd_flag_ne();
+ }
+
+ if (float32_lt(fsrca, fsrcb, &fp_status)) {
+ sfmt |= create_fsfd_flag_lt();
+ }
+ if (float32_le(fsrca, fsrcb, &fp_status)) {
+ sfmt |= create_fsfd_flag_le();
+ }
+
+ if (float32_lt(fsrcb, fsrca, &fp_status)) {
+ sfmt |= create_fsfd_flag_gt();
+ }
+ if (float32_le(fsrcb, fsrca, &fp_status)) {
+ sfmt |= create_fsfd_flag_ge();
+ }
+
+ if (float32_unordered(fsrca, fsrcb, &fp_status)) {
+ sfmt |= create_fsfd_flag_un();
+ }
+
+#ifndef IMPL_FULL
+ set_fsingle_calc(&sfmt, TILEGX_F_CALC_NCVT);
+#endif
+ return sfmt;
+}
+
+uint64_t helper_fsingle_add1(uint64_t srca, uint64_t srcb)
+{
+ return main_calc(make_float32(srca), make_float32(srcb), float32_add);
+}
+
+uint64_t helper_fsingle_sub1(uint64_t srca, uint64_t srcb)
+{
+ return main_calc(make_float32(srca), make_float32(srcb), float32_sub);
+}
+
+uint64_t helper_fsingle_mul1(uint64_t srca, uint64_t srcb)
+{
+ return main_calc(make_float32(srca), make_float32(srcb), float32_mul);
+}
--
1.9.3
^ permalink raw reply related [flat|nested] 7+ messages in thread* [Qemu-devel] [PATCH v5 4/5] target-tilegx/helper-fdouble.c: Implement double floating point
2016-01-02 22:25 [Qemu-devel] [PATCH v5 0/5] target-tilegx: Implement floating point instructions chengang
` (2 preceding siblings ...)
2016-01-02 22:25 ` [Qemu-devel] [PATCH v5 3/5] target-tilegx/helper-fsingle.c: Implement single floating point chengang
@ 2016-01-02 22:25 ` chengang
2016-01-02 22:25 ` [Qemu-devel] [PATCH v5 5/5] target-tilegx: Integrate floating pointer implementation chengang
4 siblings, 0 replies; 7+ messages in thread
From: chengang @ 2016-01-02 22:25 UTC (permalink / raw)
To: peter.maydell, rth, cmetcalf; +Cc: qemu-devel, Chen Gang
From: Chen Gang <gang.chen.5i5j@gmail.com>
Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
---
target-tilegx/helper-fdouble.c | 383 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 384 insertions(+)
create mode 100644 target-tilegx/helper-fdouble.c
diff --git a/target-tilegx/helper-fdouble.c b/target-tilegx/helper-fdouble.c
new file mode 100644
index 0000000..9aab3ac
--- /dev/null
+++ b/target-tilegx/helper-fdouble.c
@@ -0,0 +1,383 @@
+/*
+ * QEMU TILE-Gx helpers
+ *
+ * Copyright (c) 2015 Chen Gang
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see
+ * <http://www.gnu.org/licenses/lgpl-2.1.html>
+ */
+
+#include "cpu.h"
+#include "qemu-common.h"
+#include "exec/helper-proto.h"
+#include "fpu/softfloat.h"
+
+#include "helper-fshared.h"
+
+/*
+ * FDouble instructions implemenation:
+ *
+ * fdouble_unpack_min ; srca and srcb are float_64 value.
+ * ; get the min absolute value's mantissa.
+ * ; move "mantissa >> (exp_max - exp_min)" to dest.
+ *
+ * fdouble_unpack_max ; srca and srcb are float_64 value.
+ * ; get the max absolute value's mantissa.
+ * ; move mantissa to dest.
+ *
+ * fdouble_add_flags ; srca and srcb are float_64 value.
+ * ; calc exp (exp_max), sign, and comp bits for flags.
+ * ; set addsub bit to flags and move flags to dest.
+ *
+ * fdouble_sub_flags ; srca and srcb are float_64 value.
+ * ; calc exp (exp_max), sign, and comp bits for flags.
+ * ; set addsub bit to flags and move flags to dest.
+ *
+ * fdouble_addsub: ; dest, srca (max, min mantissa), and srcb (flags).
+ * ; "dest +/- srca" depend on the add/sub bit of flags.
+ * ; move result mantissa to dest.
+ *
+ * fdouble_mul_flags: ; srca and srcb are float_64 value.
+ * ; calc sign (xor), exp (min + max), and comp bits.
+ * ; mix sign, exp, and comp bits as flags to dest.
+ *
+ * fdouble_pack1 ; move srcb (flags) to dest.
+ *
+ * fdouble_pack2 ; srca, srcb (high, low mantissa), and dest (flags)
+ * ; normalize and pack result from srca, srcb, and dest.
+ * ; move result to dest.
+ */
+
+#define TILEGX_F_EXP_DZERO 0x3ff /* Zero exp for double 11-bits */
+#define TILEGX_F_EXP_DMAX 0x7fe /* max exp for double 11-bits */
+#define TILEGX_F_EXP_DUF 0x1000/* underflow exp bit for double */
+
+#define TILEGX_F_MAN_HBIT (1ULL << 59)
+
+#define TILEGX_F_CALC_ADD 1 /* Perform absolute add operation */
+#define TILEGX_F_CALC_SUB 2 /* Perform absolute sub operation */
+#define TILEGX_F_CALC_MUL 3 /* Perform absolute mul operation */
+
+static uint32_t get_f64_exp(float64 d)
+{
+ return extract64(float64_val(d), 52, 11);
+}
+
+static void set_f64_exp(float64 *d, uint32_t exp)
+{
+ *d = make_float64(deposit64(float64_val(*d), 52, 11, exp));
+}
+
+static uint64_t get_f64_man(float64 d)
+{
+ return extract64(float64_val(d), 0, 52);
+}
+
+static uint64_t fr_to_man(float64 d)
+{
+ uint64_t val = get_f64_man(d) << 7;
+
+ if (get_f64_exp(d)) {
+ val |= TILEGX_F_MAN_HBIT; /* Restore HBIT for the next calculation */
+ }
+
+ return val;
+}
+
+static uint64_t get_fdouble_man(uint64_t n)
+{
+ return extract64(n, 0, 60);
+}
+
+static void set_fdouble_man(uint64_t *n, uint64_t man)
+{
+ *n = deposit64(*n, 0, 60, man);
+}
+
+static uint64_t get_fdouble_man_of(uint64_t n)
+{
+ return test_bit(60, &n);
+}
+
+static void clear_fdouble_man_of(uint64_t *n)
+{
+ return clear_bit(60, n);
+}
+
+static uint32_t get_fdouble_nan(uint64_t n)
+{
+ return test_bit(24, &n);
+}
+
+static void set_fdouble_nan(uint64_t *n)
+{
+ set_bit(24, n);
+}
+
+static uint32_t get_fdouble_inf(uint64_t n)
+{
+ return test_bit(23, &n);
+}
+
+static void set_fdouble_inf(uint64_t *n)
+{
+ set_bit(23, n);
+}
+
+static uint32_t get_fdouble_calc(uint64_t n)
+{
+ return extract32(n, 21, 2);
+}
+
+static void set_fdouble_calc(uint64_t *n, uint32_t calc)
+{
+ *n = deposit64(*n, 21, 2, calc);
+}
+
+static uint32_t get_fdouble_sign(uint64_t n)
+{
+ return test_bit(20, &n);
+}
+
+static void set_fdouble_sign(uint64_t *n)
+{
+ set_bit(20, n);
+}
+
+static uint32_t get_fdouble_vexp(uint64_t n)
+{
+ return extract32(n, 7, 13);
+}
+
+static void set_fdouble_vexp(uint64_t *n, uint32_t vexp)
+{
+ *n = deposit64(*n, 7, 13, vexp);
+}
+
+static uint64_t shift64RightJamming(uint64_t a, int_fast16_t count)
+{
+ if (count == 0) {
+ return a;
+ } else if (count < 64) {
+ return (a >> count) | ((a << ((0 - count) & 63)) != 0);
+ }
+ return (a != 0);
+}
+
+uint64_t helper_fdouble_unpack_min(uint64_t srca, uint64_t srcb)
+{
+ uint64_t v = 0;
+ uint32_t expa = get_f64_exp(srca);
+ uint32_t expb = get_f64_exp(srcb);
+
+ if (float64_is_any_nan(srca) || float64_is_any_nan(srcb)
+ || float64_is_infinity(srca) || float64_is_infinity(srcb)) {
+ return 0;
+ } else if (expa > expb) {
+ set_fdouble_man(&v, shift64RightJamming(fr_to_man(srcb), expa - expb));
+ } else if (expa < expb) {
+ set_fdouble_man(&v, shift64RightJamming(fr_to_man(srca), expb - expa));
+ } else if (get_f64_man(srca) > get_f64_man(srcb)) {
+ set_fdouble_man(&v, fr_to_man(srcb));
+ } else {
+ set_fdouble_man(&v, fr_to_man(srca));
+ }
+
+ return v;
+}
+
+uint64_t helper_fdouble_unpack_max(uint64_t srca, uint64_t srcb)
+{
+ uint64_t v = 0;
+ uint32_t expa = get_f64_exp(srca);
+ uint32_t expb = get_f64_exp(srcb);
+
+ if (float64_is_any_nan(srca) || float64_is_any_nan(srcb)
+ || float64_is_infinity(srca) || float64_is_infinity(srcb)) {
+ return 0;
+ } else if (expa > expb) {
+ set_fdouble_man(&v, fr_to_man(srca));
+ } else if (expa < expb) {
+ set_fdouble_man(&v, fr_to_man(srcb));
+ } else if (get_f64_man(srca) > get_f64_man(srcb)) {
+ set_fdouble_man(&v, fr_to_man(srca));
+ } else {
+ set_fdouble_man(&v, fr_to_man(srcb));
+ }
+
+ return v;
+}
+
+uint64_t helper_fdouble_addsub(uint64_t dest, uint64_t srca, uint64_t srcb)
+{
+ if (get_fdouble_calc(srcb) == TILEGX_F_CALC_ADD) {
+ return dest + srca; /* maybe set addsub overflow bit */
+ } else {
+ return dest - srca;
+ }
+}
+
+/* absolute-add/mul may cause add/mul carry or overflow */
+static bool proc_oflow(uint64_t *flags, uint64_t *v, uint64_t *srcb)
+{
+ if (get_fdouble_man_of(*v)) {
+ set_fdouble_vexp(flags, get_fdouble_vexp(*flags) + 1);
+ *srcb >>= 1;
+ *srcb |= *v << 63;
+ *v >>= 1;
+ clear_fdouble_man_of(v);
+ }
+ return get_fdouble_vexp(*flags) > TILEGX_F_EXP_DMAX;
+}
+
+uint64_t helper_fdouble_pack2(uint64_t flags, uint64_t srca, uint64_t srcb)
+{
+ DEC_INIT_FPSTATUS;
+ uint64_t v = srca;
+ float64 d = float64_set_sign(float64_zero, get_fdouble_sign(flags));
+
+ if (get_fdouble_nan(flags)) {
+ return float64_val(float64_default_nan);
+ } else if (get_fdouble_inf(flags)) {
+ return float64_val(d | float64_infinity);
+ }
+
+ /* absolute-mul needs left shift 4 + 1 bytes to match the real mantissa */
+ if (get_fdouble_calc(flags) == TILEGX_F_CALC_MUL) {
+ v <<= 5;
+ v |= srcb >> 59;
+ srcb <<= 5;
+ }
+ v |= (srcb != 0);
+
+ /* must check underflow, firstly */
+ if (get_fdouble_vexp(flags) & TILEGX_F_EXP_DUF) {
+ return float64_val(d);
+ }
+
+ if (proc_oflow(&flags, &v, &srcb)) {
+ return float64_val(d | float64_infinity);
+ }
+
+ while (!(get_fdouble_man(v) & TILEGX_F_MAN_HBIT)
+ && (get_fdouble_man(v) | srcb)) {
+ set_fdouble_vexp(&flags, get_fdouble_vexp(flags) - 1);
+ set_fdouble_man(&v, get_fdouble_man(v) << 1);
+ set_fdouble_man(&v, get_fdouble_man(v) | (srcb >> 63));
+ srcb <<= 1;
+ }
+
+ /* check underflow, again, after format */
+ if ((get_fdouble_vexp(flags) & TILEGX_F_EXP_DUF) || !get_fdouble_man(v)) {
+ return float64_val(d);
+ }
+
+ if (get_fdouble_sign(flags)) {
+ d = int64_to_float64(0 - get_fdouble_man(v), &fp_status);
+ } else {
+ d = uint64_to_float64(get_fdouble_man(v), &fp_status);
+ }
+
+ if (get_f64_exp(d) == 59 + TILEGX_F_EXP_DZERO) {
+ set_f64_exp(&d, get_fdouble_vexp(flags));
+ } else { /* for carry and overflow again */
+ set_f64_exp(&d, get_fdouble_vexp(flags) + 1);
+ if (get_f64_exp(d) == TILEGX_F_EXP_DMAX) {
+ d = float64_infinity;
+ }
+ }
+
+ d = float64_set_sign(d, get_fdouble_sign(flags));
+
+ return float64_val(d);
+}
+
+static uint64_t main_calc(float64 fsrca, float64 fsrcb,
+ float64 (*calc)(float64, float64, float_status*))
+{
+ DEC_INIT_FPSTATUS;
+ float64 d;
+ uint64_t flags = 0;
+ uint32_t expa = get_f64_exp(fsrca);
+ uint32_t expb = get_f64_exp(fsrcb);
+
+ if (float64_eq(fsrca, fsrcb, &fp_status)) {
+ flags |= create_fsfd_flag_eq();
+ } else {
+ flags |= create_fsfd_flag_ne();
+ }
+
+ if (float64_lt(fsrca, fsrcb, &fp_status)) {
+ flags |= create_fsfd_flag_lt();
+ }
+ if (float64_le(fsrca, fsrcb, &fp_status)) {
+ flags |= create_fsfd_flag_le();
+ }
+
+ if (float64_lt(fsrcb, fsrca, &fp_status)) {
+ flags |= create_fsfd_flag_gt();
+ }
+ if (float64_le(fsrcb, fsrca, &fp_status)) {
+ flags |= create_fsfd_flag_ge();
+ }
+
+ if (float64_unordered(fsrca, fsrcb, &fp_status)) {
+ flags |= create_fsfd_flag_un();
+ }
+
+ d = calc(fsrca, fsrcb, &fp_status);
+ if (float64_is_neg(d)) {
+ set_fdouble_sign(&flags);
+ }
+
+ if (float64_is_any_nan(d)) {
+ set_fdouble_nan(&flags);
+ } else if (float64_is_infinity(d)) {
+ set_fdouble_inf(&flags);
+ } else if (calc == float64_add) {
+ set_fdouble_vexp(&flags, (expa > expb) ? expa : expb);
+ set_fdouble_calc(&flags,
+ (float64_is_neg(fsrca) == float64_is_neg(fsrcb))
+ ? TILEGX_F_CALC_ADD : TILEGX_F_CALC_SUB);
+
+ } else if (calc == float64_sub) {
+ set_fdouble_vexp(&flags, (expa > expb) ? expa : expb);
+ set_fdouble_calc(&flags,
+ (float64_is_neg(fsrca) != float64_is_neg(fsrcb))
+ ? TILEGX_F_CALC_ADD : TILEGX_F_CALC_SUB);
+
+ } else {
+ set_fdouble_vexp(&flags, (int64_t)(expa - TILEGX_F_EXP_DZERO)
+ + (int64_t)(expb - TILEGX_F_EXP_DZERO)
+ + TILEGX_F_EXP_DZERO);
+ set_fdouble_calc(&flags, TILEGX_F_CALC_MUL);
+ }
+
+ return flags;
+}
+
+uint64_t helper_fdouble_add_flags(uint64_t srca, uint64_t srcb)
+{
+ return main_calc(make_float64(srca), make_float64(srcb), float64_add);
+}
+
+uint64_t helper_fdouble_sub_flags(uint64_t srca, uint64_t srcb)
+{
+ return main_calc(make_float64(srca), make_float64(srcb), float64_sub);
+}
+
+uint64_t helper_fdouble_mul_flags(uint64_t srca, uint64_t srcb)
+{
+ return main_calc(make_float64(srca), make_float64(srcb), float64_mul);
+}
--
1.9.3
^ permalink raw reply related [flat|nested] 7+ messages in thread* [Qemu-devel] [PATCH v5 5/5] target-tilegx: Integrate floating pointer implementation
2016-01-02 22:25 [Qemu-devel] [PATCH v5 0/5] target-tilegx: Implement floating point instructions chengang
` (3 preceding siblings ...)
2016-01-02 22:25 ` [Qemu-devel] [PATCH v5 4/5] target-tilegx/helper-fdouble.c: Implement double " chengang
@ 2016-01-02 22:25 ` chengang
4 siblings, 0 replies; 7+ messages in thread
From: chengang @ 2016-01-02 22:25 UTC (permalink / raw)
To: peter.maydell, rth, cmetcalf; +Cc: qemu-devel, Chen Gang
From: Chen Gang <gang.chen.5i5j@gmail.com>
It passes normal building, and gcc testsuite.
Signed-off-by: Chen Gang <gang.chen.5i5j@gmail.com>
---
target-tilegx/Makefile.objs | 3 ++-
target-tilegx/helper.h | 12 +++++++++
target-tilegx/translate.c | 66 ++++++++++++++++++++++++++++++++++++++-------
3 files changed, 71 insertions(+), 10 deletions(-)
diff --git a/target-tilegx/Makefile.objs b/target-tilegx/Makefile.objs
index 0db778f..136ad60 100644
--- a/target-tilegx/Makefile.objs
+++ b/target-tilegx/Makefile.objs
@@ -1 +1,2 @@
-obj-y += cpu.o translate.o helper.o simd_helper.o
+obj-y += cpu.o translate.o helper.o simd_helper.o \
+ helper-fsingle.o helper-fdouble.o
diff --git a/target-tilegx/helper.h b/target-tilegx/helper.h
index 9281d0f..3471fe3 100644
--- a/target-tilegx/helper.h
+++ b/target-tilegx/helper.h
@@ -24,3 +24,15 @@ DEF_HELPER_FLAGS_2(v1shrs, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(v2shl, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(v2shru, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_2(v2shrs, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
+DEF_HELPER_2(fsingle_add1, i64, i64, i64)
+DEF_HELPER_2(fsingle_sub1, i64, i64, i64)
+DEF_HELPER_2(fsingle_mul1, i64, i64, i64)
+DEF_HELPER_1(fsingle_pack2, i64, i64)
+DEF_HELPER_2(fdouble_unpack_min, i64, i64, i64)
+DEF_HELPER_2(fdouble_unpack_max, i64, i64, i64)
+DEF_HELPER_2(fdouble_add_flags, i64, i64, i64)
+DEF_HELPER_2(fdouble_sub_flags, i64, i64, i64)
+DEF_HELPER_3(fdouble_addsub, i64, i64, i64, i64)
+DEF_HELPER_2(fdouble_mul_flags, i64, i64, i64)
+DEF_HELPER_3(fdouble_pack2, i64, i64, i64, i64)
diff --git a/target-tilegx/translate.c b/target-tilegx/translate.c
index 354f25a..924eece 100644
--- a/target-tilegx/translate.c
+++ b/target-tilegx/translate.c
@@ -597,6 +597,11 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
}
qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s", mnemonic, reg_names[srca]);
return ret;
+
+ case OE_RR_X0(FSINGLE_PACK1):
+ case OE_RR_Y0(FSINGLE_PACK1):
+ mnemonic = "fsingle_pack1";
+ goto done2;
}
tdest = dest_gr(dc, dest);
@@ -613,9 +618,6 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
gen_helper_cnttz(tdest, tsrca);
mnemonic = "cnttz";
break;
- case OE_RR_X0(FSINGLE_PACK1):
- case OE_RR_Y0(FSINGLE_PACK1):
- return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
case OE_RR_X1(LD1S):
memop = MO_SB;
mnemonic = "ld1s"; /* prefetch_l1_fault */
@@ -734,6 +736,7 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
return TILEGX_EXCP_OPCODE_UNKNOWN;
}
+done2:
qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s", mnemonic,
reg_names[dest], reg_names[srca]);
return ret;
@@ -742,13 +745,21 @@ static TileExcp gen_rr_opcode(DisasContext *dc, unsigned opext,
static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
unsigned dest, unsigned srca, unsigned srcb)
{
- TCGv tdest = dest_gr(dc, dest);
- TCGv tsrca = load_gr(dc, srca);
- TCGv tsrcb = load_gr(dc, srcb);
+ TCGv tdest, tsrca, tsrcb;
TCGv t0;
const char *mnemonic;
switch (opext) {
+ case OE_RRR(FSINGLE_ADDSUB2, 0, X0):
+ mnemonic = "fsingle_addsub2";
+ goto done2;
+ }
+
+ tdest = dest_gr(dc, dest);
+ tsrca = load_gr(dc, srca);
+ tsrcb = load_gr(dc, srcb);
+
+ switch (opext) {
case OE_RRR(ADDXSC, 0, X0):
case OE_RRR(ADDXSC, 0, X1):
gen_saturate_op(tdest, tsrca, tsrcb, tcg_gen_add_tl);
@@ -906,14 +917,37 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
mnemonic = "exch";
break;
case OE_RRR(FDOUBLE_ADDSUB, 0, X0):
+ gen_helper_fdouble_addsub(tdest, load_gr(dc, dest), tsrca, tsrcb);
+ mnemonic = "fdouble_addsub";
+ break;
case OE_RRR(FDOUBLE_ADD_FLAGS, 0, X0):
+ gen_helper_fdouble_add_flags(tdest, tsrca, tsrcb);
+ mnemonic = "fdouble_add_flags";
+ break;
case OE_RRR(FDOUBLE_MUL_FLAGS, 0, X0):
+ gen_helper_fdouble_mul_flags(tdest, tsrca, tsrcb);
+ mnemonic = "fdouble_mul_flags";
+ break;
case OE_RRR(FDOUBLE_PACK1, 0, X0):
+ tcg_gen_mov_i64(tdest, tsrcb);
+ mnemonic = "fdouble_pack1";
+ break;
case OE_RRR(FDOUBLE_PACK2, 0, X0):
+ gen_helper_fdouble_pack2(tdest, load_gr(dc, dest), tsrca, tsrcb);
+ mnemonic = "fdouble_pack2";
+ break;
case OE_RRR(FDOUBLE_SUB_FLAGS, 0, X0):
+ gen_helper_fdouble_sub_flags(tdest, tsrca, tsrcb);
+ mnemonic = "fdouble_sub_flags";
+ break;
case OE_RRR(FDOUBLE_UNPACK_MAX, 0, X0):
+ gen_helper_fdouble_unpack_max(tdest, tsrca, tsrcb);
+ mnemonic = "fdouble_unpack_max";
+ break;
case OE_RRR(FDOUBLE_UNPACK_MIN, 0, X0):
- return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+ gen_helper_fdouble_unpack_min(tdest, tsrca, tsrcb);
+ mnemonic = "fdouble_unpack_min";
+ break;
case OE_RRR(FETCHADD4, 0, X1):
gen_atomic_excp(dc, dest, tdest, tsrca, tsrcb,
TILEGX_EXCP_OPCODE_FETCHADD4);
@@ -955,12 +989,25 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
mnemonic = "fetchor";
break;
case OE_RRR(FSINGLE_ADD1, 0, X0):
- case OE_RRR(FSINGLE_ADDSUB2, 0, X0):
+ gen_helper_fsingle_add1(tdest, tsrca, tsrcb);
+ mnemonic = "fsingle_add1";
+ break;
case OE_RRR(FSINGLE_MUL1, 0, X0):
+ gen_helper_fsingle_mul1(tdest, tsrca, tsrcb);
+ mnemonic = "fsingle_mul1";
+ break;
case OE_RRR(FSINGLE_MUL2, 0, X0):
+ tcg_gen_mov_i64(tdest, tsrca);
+ mnemonic = "fsingle_mul2";
+ break;
case OE_RRR(FSINGLE_PACK2, 0, X0):
+ gen_helper_fsingle_pack2(tdest, tsrca);
+ mnemonic = "fsingle_pack2";
+ break;
case OE_RRR(FSINGLE_SUB1, 0, X0):
- return TILEGX_EXCP_OPCODE_UNIMPLEMENTED;
+ gen_helper_fsingle_sub1(tdest, tsrca, tsrcb);
+ mnemonic = "fsingle_sub1";
+ break;
case OE_RRR(MNZ, 0, X0):
case OE_RRR(MNZ, 0, X1):
case OE_RRR(MNZ, 4, Y0):
@@ -1464,6 +1511,7 @@ static TileExcp gen_rrr_opcode(DisasContext *dc, unsigned opext,
return TILEGX_EXCP_OPCODE_UNKNOWN;
}
+done2:
qemu_log_mask(CPU_LOG_TB_IN_ASM, "%s %s, %s, %s", mnemonic,
reg_names[dest], reg_names[srca], reg_names[srcb]);
return TILEGX_EXCP_NONE;
--
1.9.3
^ permalink raw reply related [flat|nested] 7+ messages in thread