From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:54576) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1f3rZh-0004QT-Cp for qemu-devel@nongnu.org; Wed, 04 Apr 2018 19:12:18 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1f3rZg-000489-7F for qemu-devel@nongnu.org; Wed, 04 Apr 2018 19:12:17 -0400 Received: from out5-smtp.messagingengine.com ([66.111.4.29]:33229) by eggs.gnu.org with esmtps (TLS1.0:DHE_RSA_AES_256_CBC_SHA1:32) (Exim 4.71) (envelope-from ) id 1f3rZg-00047f-3F for qemu-devel@nongnu.org; Wed, 04 Apr 2018 19:12:16 -0400 From: "Emilio G. Cota" Date: Wed, 4 Apr 2018 19:11:12 -0400 Message-Id: <1522883475-27858-13-git-send-email-cota@braap.org> In-Reply-To: <1522883475-27858-1-git-send-email-cota@braap.org> References: <1522883475-27858-1-git-send-email-cota@braap.org> Subject: [Qemu-devel] [PATCH v3 12/15] hardfloat: support float32/64 division List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: Aurelien Jarno , Peter Maydell , =?UTF-8?q?Alex=20Benn=C3=A9e?= , Laurent Vivier , Richard Henderson , Paolo Bonzini , Mark Cave-Ayland Performance results for fp-bench: 1. Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz - before: div-single: 34.84 MFlops div-double: 34.04 MFlops - after: div-single: 275.23 MFlops div-double: 216.38 MFlops 2. ARM Aarch64 A57 @ 2.4GHz - before: div-single: 9.33 MFlops div-double: 9.30 MFlops - after: div-single: 51.55 MFlops div-double: 15.09 MFlops 3. IBM POWER8E @ 2.1 GHz - before: div-single: 25.65 MFlops div-double: 24.91 MFlops - after: div-single: 96.83 MFlops div-double: 31.01 MFlops Here setting 2FP64_USE_FP to 1 pays off for x86_64: [1] 215.97 vs [0] 62.15 MFlops Signed-off-by: Emilio G. Cota --- fpu/softfloat.c | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 2 deletions(-) diff --git a/fpu/softfloat.c b/fpu/softfloat.c index 2c68b9d..4323dc2 100644 --- a/fpu/softfloat.c +++ b/fpu/softfloat.c @@ -1666,7 +1666,8 @@ float16 float16_div(float16 a, float16 b, float_status *status) return float16_round_pack_canonical(pr, status); } -float32 float32_div(float32 a, float32 b, float_status *status) +static float32 QEMU_SOFTFLOAT_ATTR +soft_float32_div(float32 a, float32 b, float_status *status) { FloatParts pa = float32_unpack_canonical(a, status); FloatParts pb = float32_unpack_canonical(b, status); @@ -1675,7 +1676,8 @@ float32 float32_div(float32 a, float32 b, float_status *status) return float32_round_pack_canonical(pr, status); } -float64 float64_div(float64 a, float64 b, float_status *status) +static float64 QEMU_SOFTFLOAT_ATTR +soft_float64_div(float64 a, float64 b, float_status *status) { FloatParts pa = float64_unpack_canonical(a, status); FloatParts pb = float64_unpack_canonical(b, status); @@ -1684,6 +1686,88 @@ float64 float64_div(float64 a, float64 b, float_status *status) return float64_round_pack_canonical(pr, status); } +static float float_div(float a, float b) +{ + return a / b; +} + +static double double_div(double a, double b) +{ + return a / b; +} + +static bool f32_div_pre(float32 a, float32 b, const struct float_status *s) +{ + return likely(float32_is_zero_or_normal(a) && + float32_is_normal(b) && + can_use_fpu(s)); +} + +static bool f64_div_pre(float64 a, float64 b, const struct float_status *s) +{ + return likely(float64_is_zero_or_normal(a) && + float64_is_normal(b) && + can_use_fpu(s)); +} + +static bool float_div_pre(float a, float b, const struct float_status *s) +{ + return likely((fpclassify(a) == FP_NORMAL || fpclassify(a) == FP_ZERO) && + fpclassify(b) == FP_NORMAL && + can_use_fpu(s)); +} + +static bool double_div_pre(double a, double b, const struct float_status *s) +{ + return likely((fpclassify(a) == FP_NORMAL || fpclassify(a) == FP_ZERO) && + fpclassify(b) == FP_NORMAL && + can_use_fpu(s)); +} + +static bool f32_div_post(float32 a, float32 b, const struct float_status *s) +{ + return !float32_is_zero(a); +} + +static bool f64_div_post(float64 a, float64 b, const struct float_status *s) +{ + return !float64_is_zero(a); +} + +static bool float_div_post(float a, float b, const struct float_status *s) +{ + return fpclassify(a) != FP_ZERO; +} + +static bool double_div_post(double a, double b, const struct float_status *s) +{ + return fpclassify(a) != FP_ZERO; +} + +float32 __attribute__((flatten)) +float32_div(float32 a, float32 b, float_status *s) +{ + if (QEMU_HARDFLOAT_2F32_USE_FP) { + return float_gen2(a, b, s, float_div, soft_float32_div, float_div_pre, + float_div_post, NULL, NULL); + } else { + return f32_gen2(a, b, s, float_div, soft_float32_div, f32_div_pre, + f32_div_post, NULL, NULL); + } +} + +float64 __attribute__((flatten)) +float64_div(float64 a, float64 b, float_status *s) +{ + if (QEMU_HARDFLOAT_2F64_USE_FP) { + return double_gen2(a, b, s, double_div, soft_float64_div, + double_div_pre, double_div_post, NULL, NULL); + } else { + return f64_gen2(a, b, s, double_div, soft_float64_div, f64_div_pre, + f64_div_post, NULL, NULL); + } +} + /* * Rounds the floating-point value `a' to an integer, and returns the * result as a floating-point value. The operation is performed -- 2.7.4