* [Qemu-devel] [PATCH 1/5] softfloat: move all default NaN definitions to softfloat.h.
2011-02-21 10:59 [Qemu-devel] [PATCH v5 0/5] ARM: fix Neon VRECPE and VRSQRTE instructions Christophe Lyon
@ 2011-02-21 10:59 ` Christophe Lyon
2011-02-21 11:55 ` Peter Maydell
2011-02-21 10:59 ` [Qemu-devel] [PATCH 2/5] softfloat: add _set_sign(), _infinity and _half for 32 and 64 bits floats Christophe Lyon
` (3 subsequent siblings)
4 siblings, 1 reply; 11+ messages in thread
From: Christophe Lyon @ 2011-02-21 10:59 UTC (permalink / raw)
To: qemu-devel
These special values are needed to implement some helper functions,
which return/use these values in some cases.
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
fpu/softfloat-specialize.h | 68 -------------------------------------------
fpu/softfloat.h | 69 ++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 69 insertions(+), 68 deletions(-)
diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
index 2d025bf..adc5ada 100644
--- a/fpu/softfloat-specialize.h
+++ b/fpu/softfloat-specialize.h
@@ -30,12 +30,6 @@ these four paragraphs for those parts of this code that are retained.
=============================================================================*/
-#if defined(TARGET_MIPS) || defined(TARGET_SH4)
-#define SNAN_BIT_IS_ONE 1
-#else
-#define SNAN_BIT_IS_ONE 0
-#endif
-
/*----------------------------------------------------------------------------
| Raises the exceptions specified by `flags'. Floating-point traps can be
| defined here if desired. It is currently not possible for such a trap
@@ -57,17 +51,6 @@ typedef struct {
} commonNaNT;
/*----------------------------------------------------------------------------
-| The pattern for a default generated half-precision NaN.
-*----------------------------------------------------------------------------*/
-#if defined(TARGET_ARM)
-#define float16_default_nan make_float16(0x7E00)
-#elif SNAN_BIT_IS_ONE
-#define float16_default_nan make_float16(0x7DFF)
-#else
-#define float16_default_nan make_float16(0xFE00)
-#endif
-
-/*----------------------------------------------------------------------------
| Returns 1 if the half-precision floating-point value `a' is a quiet
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
@@ -158,19 +141,6 @@ static float16 commonNaNToFloat16(commonNaNT a STATUS_PARAM)
}
/*----------------------------------------------------------------------------
-| The pattern for a default generated single-precision NaN.
-*----------------------------------------------------------------------------*/
-#if defined(TARGET_SPARC)
-#define float32_default_nan make_float32(0x7FFFFFFF)
-#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
-#define float32_default_nan make_float32(0x7FC00000)
-#elif SNAN_BIT_IS_ONE
-#define float32_default_nan make_float32(0x7FBFFFFF)
-#else
-#define float32_default_nan make_float32(0xFFC00000)
-#endif
-
-/*----------------------------------------------------------------------------
| Returns 1 if the single-precision floating-point value `a' is a quiet
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
@@ -413,19 +383,6 @@ static float32 propagateFloat32NaN( float32 a, float32 b STATUS_PARAM)
}
/*----------------------------------------------------------------------------
-| The pattern for a default generated double-precision NaN.
-*----------------------------------------------------------------------------*/
-#if defined(TARGET_SPARC)
-#define float64_default_nan make_float64(LIT64( 0x7FFFFFFFFFFFFFFF ))
-#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
-#define float64_default_nan make_float64(LIT64( 0x7FF8000000000000 ))
-#elif SNAN_BIT_IS_ONE
-#define float64_default_nan make_float64(LIT64( 0x7FF7FFFFFFFFFFFF ))
-#else
-#define float64_default_nan make_float64(LIT64( 0xFFF8000000000000 ))
-#endif
-
-/*----------------------------------------------------------------------------
| Returns 1 if the double-precision floating-point value `a' is a quiet
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
@@ -564,19 +521,6 @@ static float64 propagateFloat64NaN( float64 a, float64 b STATUS_PARAM)
#ifdef FLOATX80
/*----------------------------------------------------------------------------
-| The pattern for a default generated extended double-precision NaN. The
-| `high' and `low' values hold the most- and least-significant bits,
-| respectively.
-*----------------------------------------------------------------------------*/
-#if SNAN_BIT_IS_ONE
-#define floatx80_default_nan_high 0x7FFF
-#define floatx80_default_nan_low LIT64( 0xBFFFFFFFFFFFFFFF )
-#else
-#define floatx80_default_nan_high 0xFFFF
-#define floatx80_default_nan_low LIT64( 0xC000000000000000 )
-#endif
-
-/*----------------------------------------------------------------------------
| Returns 1 if the extended double-precision floating-point value `a' is a
| quiet NaN; otherwise returns 0. This slightly differs from the same
| function for other types as floatx80 has an explicit bit.
@@ -728,18 +672,6 @@ static floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b STATUS_PARAM)
#ifdef FLOAT128
/*----------------------------------------------------------------------------
-| The pattern for a default generated quadruple-precision NaN. The `high' and
-| `low' values hold the most- and least-significant bits, respectively.
-*----------------------------------------------------------------------------*/
-#if SNAN_BIT_IS_ONE
-#define float128_default_nan_high LIT64( 0x7FFF7FFFFFFFFFFF )
-#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF )
-#else
-#define float128_default_nan_high LIT64( 0xFFFF800000000000 )
-#define float128_default_nan_low LIT64( 0x0000000000000000 )
-#endif
-
-/*----------------------------------------------------------------------------
| Returns 1 if the quadruple-precision floating-point value `a' is a quiet
| NaN; otherwise returns 0.
*----------------------------------------------------------------------------*/
diff --git a/fpu/softfloat.h b/fpu/softfloat.h
index e57ee1e..f34a938 100644
--- a/fpu/softfloat.h
+++ b/fpu/softfloat.h
@@ -77,6 +77,12 @@ typedef int64_t sbits64;
#define LIT64( a ) a##LL
#define INLINE static inline
+#if defined(TARGET_MIPS) || defined(TARGET_SH4)
+#define SNAN_BIT_IS_ONE 1
+#else
+#define SNAN_BIT_IS_ONE 0
+#endif
+
/*----------------------------------------------------------------------------
| The macro `FLOATX80' must be defined to enable the extended double-precision
| floating-point format `floatx80'. If this macro is not defined, the
@@ -278,6 +284,17 @@ int float16_is_signaling_nan( float16 );
float16 float16_maybe_silence_nan( float16 );
/*----------------------------------------------------------------------------
+| The pattern for a default generated half-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_ARM)
+#define float16_default_nan make_float16(0x7E00)
+#elif SNAN_BIT_IS_ONE
+#define float16_default_nan make_float16(0x7DFF)
+#else
+#define float16_default_nan make_float16(0xFE00)
+#endif
+
+/*----------------------------------------------------------------------------
| Software IEC/IEEE single-precision conversion routines.
*----------------------------------------------------------------------------*/
int float32_to_int16_round_to_zero( float32 STATUS_PARAM );
@@ -366,6 +383,20 @@ INLINE int float32_is_zero_or_denormal(float32 a)
#define float32_one make_float32(0x3f800000)
#define float32_ln2 make_float32(0x3f317218)
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_SPARC)
+#define float32_default_nan make_float32(0x7FFFFFFF)
+#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+#define float32_default_nan make_float32(0x7FC00000)
+#elif SNAN_BIT_IS_ONE
+#define float32_default_nan make_float32(0x7FBFFFFF)
+#else
+#define float32_default_nan make_float32(0xFFC00000)
+#endif
+
/*----------------------------------------------------------------------------
| Software IEC/IEEE double-precision conversion routines.
*----------------------------------------------------------------------------*/
@@ -452,6 +483,19 @@ INLINE int float64_is_any_nan(float64 a)
#define float64_one make_float64(0x3ff0000000000000LL)
#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#if defined(TARGET_SPARC)
+#define float64_default_nan make_float64(LIT64( 0x7FFFFFFFFFFFFFFF ))
+#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+#define float64_default_nan make_float64(LIT64( 0x7FF8000000000000 ))
+#elif SNAN_BIT_IS_ONE
+#define float64_default_nan make_float64(LIT64( 0x7FF7FFFFFFFFFFFF ))
+#else
+#define float64_default_nan make_float64(LIT64( 0xFFF8000000000000 ))
+#endif
+
#ifdef FLOATX80
/*----------------------------------------------------------------------------
@@ -520,6 +564,19 @@ INLINE int floatx80_is_any_nan(floatx80 a)
return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1);
}
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN. The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#if SNAN_BIT_IS_ONE
+#define floatx80_default_nan_high 0x7FFF
+#define floatx80_default_nan_low LIT64( 0xBFFFFFFFFFFFFFFF )
+#else
+#define floatx80_default_nan_high 0xFFFF
+#define floatx80_default_nan_low LIT64( 0xC000000000000000 )
+#endif
+
#endif
#ifdef FLOAT128
@@ -593,6 +650,18 @@ INLINE int float128_is_any_nan(float128 a)
((a.low != 0) || ((a.high & 0xffffffffffffLL) != 0));
}
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN. The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#if SNAN_BIT_IS_ONE
+#define float128_default_nan_high LIT64( 0x7FFF7FFFFFFFFFFF )
+#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF )
+#else
+#define float128_default_nan_high LIT64( 0xFFFF800000000000 )
+#define float128_default_nan_low LIT64( 0x0000000000000000 )
+#endif
+
#endif
#else /* CONFIG_SOFTFLOAT */
--
1.7.2.3
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH 2/5] softfloat: add _set_sign(), _infinity and _half for 32 and 64 bits floats.
2011-02-21 10:59 [Qemu-devel] [PATCH v5 0/5] ARM: fix Neon VRECPE and VRSQRTE instructions Christophe Lyon
2011-02-21 10:59 ` [Qemu-devel] [PATCH 1/5] softfloat: move all default NaN definitions to softfloat.h Christophe Lyon
@ 2011-02-21 10:59 ` Christophe Lyon
2011-02-21 11:57 ` Peter Maydell
2011-02-21 10:59 ` [Qemu-devel] [PATCH 3/5] target-arm: Introduce float64_256 and float64_512 constants Christophe Lyon
` (2 subsequent siblings)
4 siblings, 1 reply; 11+ messages in thread
From: Christophe Lyon @ 2011-02-21 10:59 UTC (permalink / raw)
To: qemu-devel
These constants and utility function are needed to implement some
helpers. Defining constants avoids the need to re-compute them at
runtime.
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
fpu/softfloat.h | 15 +++++++++++++++
1 files changed, 15 insertions(+), 0 deletions(-)
diff --git a/fpu/softfloat.h b/fpu/softfloat.h
index f34a938..fd61dc4 100644
--- a/fpu/softfloat.h
+++ b/fpu/softfloat.h
@@ -379,9 +379,16 @@ INLINE int float32_is_zero_or_denormal(float32 a)
return (float32_val(a) & 0x7f800000) == 0;
}
+INLINE float32 float32_set_sign(float32 a, int sign)
+{
+ return make_float32((float32_val(a) & 0x7fffffff) | (sign << 31));
+}
+
#define float32_zero make_float32(0)
#define float32_one make_float32(0x3f800000)
#define float32_ln2 make_float32(0x3f317218)
+#define float32_half make_float32(0x3f000000)
+#define float32_infinity make_float32(0x7f800000)
/*----------------------------------------------------------------------------
@@ -479,9 +486,17 @@ INLINE int float64_is_any_nan(float64 a)
return ((float64_val(a) & ~(1ULL << 63)) > 0x7ff0000000000000ULL);
}
+INLINE float64 float64_set_sign(float64 a, int sign)
+{
+ return make_float64((float64_val(a) & 0x7fffffffffffffffULL)
+ | ((int64_t)sign << 63));
+}
+
#define float64_zero make_float64(0)
#define float64_one make_float64(0x3ff0000000000000LL)
#define float64_ln2 make_float64(0x3fe62e42fefa39efLL)
+#define float64_half make_float64(0x3fe0000000000000LL)
+#define float64_infinity make_float64(0x7ff0000000000000LL)
/*----------------------------------------------------------------------------
| The pattern for a default generated double-precision NaN.
--
1.7.2.3
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH 3/5] target-arm: Introduce float64_256 and float64_512 constants.
2011-02-21 10:59 [Qemu-devel] [PATCH v5 0/5] ARM: fix Neon VRECPE and VRSQRTE instructions Christophe Lyon
2011-02-21 10:59 ` [Qemu-devel] [PATCH 1/5] softfloat: move all default NaN definitions to softfloat.h Christophe Lyon
2011-02-21 10:59 ` [Qemu-devel] [PATCH 2/5] softfloat: add _set_sign(), _infinity and _half for 32 and 64 bits floats Christophe Lyon
@ 2011-02-21 10:59 ` Christophe Lyon
2011-02-21 11:59 ` Peter Maydell
2011-02-21 10:59 ` [Qemu-devel] [PATCH 4/5] target-arm: fix support for VRECPE Christophe Lyon
2011-02-21 10:59 ` [Qemu-devel] [PATCH 5/5] target-arm: fix support for VRSQRTE Christophe Lyon
4 siblings, 1 reply; 11+ messages in thread
From: Christophe Lyon @ 2011-02-21 10:59 UTC (permalink / raw)
To: qemu-devel
These two constants will be used by helper functions such as recpe_f32
and rsqrte_f32.
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
target-arm/helper.c | 5 +++++
1 files changed, 5 insertions(+), 0 deletions(-)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 7f63a28..30c1809 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2687,6 +2687,11 @@ float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUState *env)
/* NEON helpers. */
+/* Constants 256 and 512 are used in some helpers; we avoid relying on
+ * int->float conversions at run-time. */
+#define float64_256 make_float64(0x4070000000000000LL)
+#define float64_512 make_float64(0x4080000000000000LL)
+
/* TODO: The architecture specifies the value that the estimate functions
should return. We return the exact reciprocal/root instead. */
float32 HELPER(recpe_f32)(float32 a, CPUState *env)
--
1.7.2.3
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH 4/5] target-arm: fix support for VRECPE.
2011-02-21 10:59 [Qemu-devel] [PATCH v5 0/5] ARM: fix Neon VRECPE and VRSQRTE instructions Christophe Lyon
` (2 preceding siblings ...)
2011-02-21 10:59 ` [Qemu-devel] [PATCH 3/5] target-arm: Introduce float64_256 and float64_512 constants Christophe Lyon
@ 2011-02-21 10:59 ` Christophe Lyon
2011-02-21 11:53 ` Peter Maydell
2011-02-21 10:59 ` [Qemu-devel] [PATCH 5/5] target-arm: fix support for VRSQRTE Christophe Lyon
4 siblings, 1 reply; 11+ messages in thread
From: Christophe Lyon @ 2011-02-21 10:59 UTC (permalink / raw)
To: qemu-devel
Now use the same algorithm as described in the ARM ARM.
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
target-arm/helper.c | 83 +++++++++++++++++++++++++++++++++++++++++++-------
1 files changed, 71 insertions(+), 12 deletions(-)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 30c1809..7445def 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2692,13 +2692,67 @@ float32 HELPER(rsqrts_f32)(float32 a, float32 b, CPUState *env)
#define float64_256 make_float64(0x4070000000000000LL)
#define float64_512 make_float64(0x4080000000000000LL)
-/* TODO: The architecture specifies the value that the estimate functions
- should return. We return the exact reciprocal/root instead. */
+/* The algorithm that must be used to calculate the estimate
+ * is specified by the ARM ARM.
+ */
+static float64 recip_estimate(float64 a, CPUState *env)
+{
+ float_status *s = &env->vfp.standard_fp_status;
+ /* q = (int)(a * 512.0) */
+ float64 q = float64_mul(float64_512, a, s);
+ int64_t q_int = float64_to_int64_round_to_zero(q, s);
+
+ /* r = 1.0 / (((double)q + 0.5) / 512.0) */
+ q = int64_to_float64(q_int, s);
+ q = float64_add(q, float64_half, s);
+ q = float64_div(q, float64_512, s);
+ q = float64_div(float64_one, q, s);
+
+ /* s = (int)(256.0 * r + 0.5) */
+ q = float64_mul(q, float64_256, s);
+ q = float64_add(q, float64_half, s);
+ q_int = float64_to_int64_round_to_zero(q, s);
+
+ /* return (double)s / 256.0 */
+ return float64_div(int64_to_float64(q_int, s), float64_256, s);
+}
+
float32 HELPER(recpe_f32)(float32 a, CPUState *env)
{
- float_status *s = &env->vfp.fp_status;
- float32 one = int32_to_float32(1, s);
- return float32_div(one, a, s);
+ float_status *s = &env->vfp.standard_fp_status;
+ float64 f64;
+ uint32_t val32 = float32_val(a);
+
+ int result_exp;
+ int a_exp = (val32 & 0x7f800000) >> 23;
+ int sign = val32 & 0x80000000;
+
+ if (float32_is_any_nan(a)) {
+ if (float32_is_signaling_nan(a)) {
+ float_raise(float_flag_invalid, s);
+ }
+ return float32_maybe_silence_nan(a);
+ } else if (float32_is_infinity(a)) {
+ return float32_set_sign(float32_zero, float32_is_neg(a));
+ } else if (float32_is_zero_or_denormal(a)) {
+ float_raise(float_flag_divbyzero, s);
+ return float32_set_sign(float32_infinity, float32_is_neg(a));
+ } else if (a_exp >= 253) {
+ float_raise(float_flag_underflow, s);
+ return float32_set_sign(float32_zero, float32_is_neg(a));
+ }
+
+ f64 = make_float64((0x3feULL << 52)
+ | ((int64_t)(val32 & 0x7fffff) << 29));
+
+ result_exp = 253 - a_exp;
+
+ f64 = recip_estimate(f64, env);
+
+ val32 = sign
+ | ((result_exp & 0xff) << 23)
+ | ((float64_val(f64) >> 29) & 0x7fffff);
+ return make_float32(val32);
}
float32 HELPER(rsqrte_f32)(float32 a, CPUState *env)
@@ -2710,13 +2764,18 @@ float32 HELPER(rsqrte_f32)(float32 a, CPUState *env)
uint32_t HELPER(recpe_u32)(uint32_t a, CPUState *env)
{
- float_status *s = &env->vfp.fp_status;
- float32 tmp;
- tmp = int32_to_float32(a, s);
- tmp = float32_scalbn(tmp, -32, s);
- tmp = helper_recpe_f32(tmp, env);
- tmp = float32_scalbn(tmp, 31, s);
- return float32_to_int32(tmp, s);
+ float64 f64;
+
+ if ((a & 0x80000000) == 0) {
+ return 0xffffffff;
+ }
+
+ f64 = make_float64((0x3feULL << 52)
+ | ((int64_t)(a & 0x7fffffff) << 21));
+
+ f64 = recip_estimate (f64, env);
+
+ return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
}
uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUState *env)
--
1.7.2.3
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [Qemu-devel] [PATCH 4/5] target-arm: fix support for VRECPE.
2011-02-21 10:59 ` [Qemu-devel] [PATCH 4/5] target-arm: fix support for VRECPE Christophe Lyon
@ 2011-02-21 11:53 ` Peter Maydell
0 siblings, 0 replies; 11+ messages in thread
From: Peter Maydell @ 2011-02-21 11:53 UTC (permalink / raw)
To: Christophe Lyon; +Cc: qemu-devel
On 21 February 2011 10:59, Christophe Lyon <christophe.lyon@st.com> wrote:
> + if (float32_is_any_nan(a)) {
> + if (float32_is_signaling_nan(a)) {
> + float_raise(float_flag_invalid, s);
> + }
> + return float32_maybe_silence_nan(a);
This returns the wrong answer for NaNs: this is a
Neon instruction so you want to return the default NaN.
Just 'return float32_default_nan;' instead of
calling float32_maybe_silence_nan().
(What's actually missing in softfloat is a single-value
equivalent of propagateFloat*NaN() but since we don't
need to vary based on default_nan_mode we can just
ignore that for now.)
Otherwise OK, I think.
PS: if you have a suitable board available for
use as a reference you could run risu on your
patches and avoid them being bounced back for
corner case failures :-)
-- PMM
^ permalink raw reply [flat|nested] 11+ messages in thread
* [Qemu-devel] [PATCH 5/5] target-arm: fix support for VRSQRTE.
2011-02-21 10:59 [Qemu-devel] [PATCH v5 0/5] ARM: fix Neon VRECPE and VRSQRTE instructions Christophe Lyon
` (3 preceding siblings ...)
2011-02-21 10:59 ` [Qemu-devel] [PATCH 4/5] target-arm: fix support for VRECPE Christophe Lyon
@ 2011-02-21 10:59 ` Christophe Lyon
2011-02-21 12:04 ` Peter Maydell
4 siblings, 1 reply; 11+ messages in thread
From: Christophe Lyon @ 2011-02-21 10:59 UTC (permalink / raw)
To: qemu-devel
Now use the same algorithm as described in the ARM ARM.
Signed-off-by: Christophe Lyon <christophe.lyon@st.com>
---
target-arm/helper.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 111 insertions(+), 10 deletions(-)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 7445def..24d02a4 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -2755,11 +2755,104 @@ float32 HELPER(recpe_f32)(float32 a, CPUState *env)
return make_float32(val32);
}
+/* The algorithm that must be used to calculate the estimate
+ * is specified by the ARM ARM.
+ */
+static float64 recip_sqrt_estimate(float64 a, CPUState *env)
+{
+ float_status *s = &env->vfp.standard_fp_status;
+ float64 q;
+ int64_t q_int;
+
+ if (float64_lt(a, float64_half, s)) {
+ /* range 0.25 <= a < 0.5 */
+
+ /* a in units of 1/512 rounded down */
+ /* q0 = (int)(a * 512.0); */
+ q = float64_mul(float64_512, a, s);
+ q_int = float64_to_int64_round_to_zero(q, s);
+
+ /* reciprocal root r */
+ /* r = 1.0 / sqrt(((double)q0 + 0.5) / 512.0); */
+ q = int64_to_float64(q_int, s);
+ q = float64_add(q, float64_half, s);
+ q = float64_div(q, float64_512, s);
+ q = float64_sqrt(q, s);
+ q = float64_div(float64_one, q, s);
+ } else {
+ /* range 0.5 <= a < 1.0 */
+
+ /* a in units of 1/256 rounded down */
+ /* q1 = (int)(a * 256.0); */
+ q = float64_mul(float64_256, a, s);
+ int64_t q_int = float64_to_int64_round_to_zero(q, s);
+
+ /* reciprocal root r */
+ /* r = 1.0 /sqrt(((double)q1 + 0.5) / 256); */
+ q = int64_to_float64(q_int, s);
+ q = float64_add(q, float64_half, s);
+ q = float64_div(q, float64_256, s);
+ q = float64_sqrt(q, s);
+ q = float64_div(float64_one, q, s);
+ }
+ /* r in units of 1/256 rounded to nearest */
+ /* s = (int)(256.0 * r + 0.5); */
+
+ q = float64_mul(q, float64_256,s );
+ q = float64_add(q, float64_half, s);
+ q_int = float64_to_int64_round_to_zero(q, s);
+
+ /* return (double)s / 256.0;*/
+ return float64_div(int64_to_float64(q_int, s), float64_256, s);
+}
+
float32 HELPER(rsqrte_f32)(float32 a, CPUState *env)
{
- float_status *s = &env->vfp.fp_status;
- float32 one = int32_to_float32(1, s);
- return float32_div(one, float32_sqrt(a, s), s);
+ float_status *s = &env->vfp.standard_fp_status;
+ int result_exp;
+ float64 f64;
+ uint32_t val;
+ uint64_t val64;
+
+ val = float32_val(a);
+
+ if (float32_is_any_nan(a)) {
+ if (float32_is_signaling_nan(a)) {
+ float_raise(float_flag_invalid, s);
+ }
+ return float32_maybe_silence_nan(a);
+ } else if (float32_is_zero_or_denormal(a)) {
+ float_raise(float_flag_divbyzero, s);
+ return float32_set_sign(float32_infinity, float32_is_neg(a));
+ } else if (float32_is_neg(a)) {
+ float_raise(float_flag_invalid, s);
+ return float32_default_nan;
+ } else if (float32_is_infinity(a)) {
+ return float32_zero;
+ }
+
+ /* Normalize to a double-precision value between 0.25 and 1.0,
+ * preserving the parity of the exponent. */
+ if ((val & 0x800000) == 0) {
+ f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+ | (0x3feULL << 52)
+ | ((uint64_t)(val & 0x7fffff) << 29));
+ } else {
+ f64 = make_float64(((uint64_t)(val & 0x80000000) << 32)
+ | (0x3fdULL << 52)
+ | ((uint64_t)(val & 0x7fffff) << 29));
+ }
+
+ result_exp = (380 - ((val & 0x7f800000) >> 23)) / 2;
+
+ f64 = recip_sqrt_estimate(f64, env);
+
+ val64 = float64_val(f64);
+
+ val = ((val64 >> 63) & 0x80000000)
+ | ((result_exp & 0xff) << 23)
+ | ((val64 >> 29) & 0x7fffff);
+ return make_float32(val);
}
uint32_t HELPER(recpe_u32)(uint32_t a, CPUState *env)
@@ -2780,13 +2873,21 @@ uint32_t HELPER(recpe_u32)(uint32_t a, CPUState *env)
uint32_t HELPER(rsqrte_u32)(uint32_t a, CPUState *env)
{
- float_status *s = &env->vfp.fp_status;
- float32 tmp;
- tmp = int32_to_float32(a, s);
- tmp = float32_scalbn(tmp, -32, s);
- tmp = helper_rsqrte_f32(tmp, env);
- tmp = float32_scalbn(tmp, 31, s);
- return float32_to_int32(tmp, s);
+ float64 f64;
+
+ if ((a & 0xc0000000) == 0) return 0xffffffff;
+
+ if (a & 0x80000000) {
+ f64 = make_float64((0x3feULL << 52)
+ | ((uint64_t)(a & 0x7fffffff) << 21));
+ } else { /* bits 31-30 == '01' */
+ f64 = make_float64((0x3fdULL << 52)
+ | ((uint64_t)(a & 0x3fffffff) << 22));
+ }
+
+ f64 = recip_sqrt_estimate(f64, env);
+
+ return 0x80000000 | ((float64_val(f64) >> 21) & 0x7fffffff);
}
void HELPER(set_teecr)(CPUState *env, uint32_t val)
--
1.7.2.3
^ permalink raw reply related [flat|nested] 11+ messages in thread