From: Joseph Myers <joseph@codesourcery.com>
To: <qemu-devel@nongnu.org>, <aurelien@aurel32.net>,
<peter.maydell@linaro.org>, <alex.bennee@linaro.org>,
<laurent@vivier.eu>, <pbonzini@redhat.com>, <rth@twiddle.net>,
<ehabkost@redhat.com>
Subject: [PATCH v2 6/6] target/i386: reimplement fprem, fprem1 using floatx80 operations
Date: Mon, 8 Jun 2020 16:58:23 +0000 [thread overview]
Message-ID: <alpine.DEB.2.21.2006081657200.23637@digraph.polyomino.org.uk> (raw)
In-Reply-To: <alpine.DEB.2.21.2006081653080.23637@digraph.polyomino.org.uk>
The x87 fprem and fprem1 emulation is currently based around
conversion to double, which is inherently unsuitable for a good
emulation of any floatx80 operation. Reimplement using the soft-float
floatx80 remainder operations.
Signed-off-by: Joseph Myers <joseph@codesourcery.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
---
target/i386/fpu_helper.c | 156 ++++++++++++---------------------------
1 file changed, 48 insertions(+), 108 deletions(-)
diff --git a/target/i386/fpu_helper.c b/target/i386/fpu_helper.c
index 8ef5b463ea..0e531e3821 100644
--- a/target/i386/fpu_helper.c
+++ b/target/i386/fpu_helper.c
@@ -934,124 +934,64 @@ void helper_fxtract(CPUX86State *env)
merge_exception_flags(env, old_flags);
}
-void helper_fprem1(CPUX86State *env)
+static void helper_fprem_common(CPUX86State *env, bool mod)
{
- double st0, st1, dblq, fpsrcop, fptemp;
- CPU_LDoubleU fpsrcop1, fptemp1;
- int expdif;
- signed long long int q;
-
- st0 = floatx80_to_double(env, ST0);
- st1 = floatx80_to_double(env, ST1);
-
- if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
- ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
- env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
- return;
- }
-
- fpsrcop = st0;
- fptemp = st1;
- fpsrcop1.d = ST0;
- fptemp1.d = ST1;
- expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
-
- if (expdif < 0) {
- /* optimisation? taken from the AMD docs */
- env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
- /* ST0 is unchanged */
- return;
- }
+ uint8_t old_flags = save_exception_flags(env);
+ uint64_t quotient;
+ CPU_LDoubleU temp0, temp1;
+ int exp0, exp1, expdiff;
- if (expdif < 53) {
- dblq = fpsrcop / fptemp;
- /* round dblq towards nearest integer */
- dblq = rint(dblq);
- st0 = fpsrcop - fptemp * dblq;
+ temp0.d = ST0;
+ temp1.d = ST1;
+ exp0 = EXPD(temp0);
+ exp1 = EXPD(temp1);
- /* convert dblq to q by truncating towards zero */
- if (dblq < 0.0) {
- q = (signed long long int)(-dblq);
+ env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
+ if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) ||
+ exp0 == 0x7fff || exp1 == 0x7fff ||
+ floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) {
+ ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status);
+ } else {
+ if (exp0 == 0) {
+ exp0 = 1 - clz64(temp0.l.lower);
+ }
+ if (exp1 == 0) {
+ exp1 = 1 - clz64(temp1.l.lower);
+ }
+ expdiff = exp0 - exp1;
+ if (expdiff < 64) {
+ ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status);
+ env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */
+ env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */
+ env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */
} else {
- q = (signed long long int)dblq;
+ /*
+ * Partial remainder. This choice of how many bits to
+ * process at once is specified in AMD instruction set
+ * manuals, and empirically is followed by Intel
+ * processors as well; it ensures that the final remainder
+ * operation in a loop does produce the correct low three
+ * bits of the quotient. AMD manuals specify that the
+ * flags other than C2 are cleared, and empirically Intel
+ * processors clear them as well.
+ */
+ int n = 32 + (expdiff % 32);
+ temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status);
+ ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status);
+ env->fpus |= 0x400; /* C2 <-- 1 */
}
-
- env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
- /* (C0,C3,C1) <-- (q2,q1,q0) */
- env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
- env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
- env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
- } else {
- env->fpus |= 0x400; /* C2 <-- 1 */
- fptemp = pow(2.0, expdif - 50);
- fpsrcop = (st0 / st1) / fptemp;
- /* fpsrcop = integer obtained by chopping */
- fpsrcop = (fpsrcop < 0.0) ?
- -(floor(fabs(fpsrcop))) : floor(fpsrcop);
- st0 -= (st1 * fpsrcop * fptemp);
}
- ST0 = double_to_floatx80(env, st0);
+ merge_exception_flags(env, old_flags);
}
-void helper_fprem(CPUX86State *env)
+void helper_fprem1(CPUX86State *env)
{
- double st0, st1, dblq, fpsrcop, fptemp;
- CPU_LDoubleU fpsrcop1, fptemp1;
- int expdif;
- signed long long int q;
-
- st0 = floatx80_to_double(env, ST0);
- st1 = floatx80_to_double(env, ST1);
-
- if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
- ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
- env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
- return;
- }
-
- fpsrcop = st0;
- fptemp = st1;
- fpsrcop1.d = ST0;
- fptemp1.d = ST1;
- expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
-
- if (expdif < 0) {
- /* optimisation? taken from the AMD docs */
- env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
- /* ST0 is unchanged */
- return;
- }
-
- if (expdif < 53) {
- dblq = fpsrcop / fptemp; /* ST0 / ST1 */
- /* round dblq towards zero */
- dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
- st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
-
- /* convert dblq to q by truncating towards zero */
- if (dblq < 0.0) {
- q = (signed long long int)(-dblq);
- } else {
- q = (signed long long int)dblq;
- }
-
- env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
- /* (C0,C3,C1) <-- (q2,q1,q0) */
- env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
- env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
- env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
- } else {
- int N = 32 + (expdif % 32); /* as per AMD docs */
+ helper_fprem_common(env, false);
+}
- env->fpus |= 0x400; /* C2 <-- 1 */
- fptemp = pow(2.0, (double)(expdif - N));
- fpsrcop = (st0 / st1) / fptemp;
- /* fpsrcop = integer obtained by chopping */
- fpsrcop = (fpsrcop < 0.0) ?
- -(floor(fabs(fpsrcop))) : floor(fpsrcop);
- st0 -= (st1 * fpsrcop * fptemp);
- }
- ST0 = double_to_floatx80(env, st0);
+void helper_fprem(CPUX86State *env)
+{
+ helper_fprem_common(env, true);
}
void helper_fyl2xp1(CPUX86State *env)
--
2.17.1
--
Joseph S. Myers
joseph@codesourcery.com
next prev parent reply other threads:[~2020-06-08 17:06 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-06-08 16:54 [PATCH v2 0/6] softfloat, target/i386: fprem, fprem1 fixes Joseph Myers
2020-06-08 16:55 ` [PATCH v2 1/6] softfloat: merge floatx80_mod and floatx80_rem Joseph Myers
2020-06-08 16:55 ` [PATCH v2 2/6] softfloat: fix floatx80 remainder pseudo-denormal check for zero Joseph Myers
2020-06-08 16:56 ` [PATCH v2 3/6] softfloat: do not return pseudo-denormal from floatx80 remainder Joseph Myers
2020-06-08 16:56 ` [PATCH v2 4/6] softfloat: do not set denominator high bit for " Joseph Myers
2020-06-08 16:57 ` [PATCH v2 5/6] softfloat: return low bits of quotient from floatx80_modrem Joseph Myers
2020-06-08 16:58 ` Joseph Myers [this message]
2020-06-12 16:41 ` [PATCH v2 0/6] softfloat, target/i386: fprem, fprem1 fixes Paolo Bonzini
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=alpine.DEB.2.21.2006081657200.23637@digraph.polyomino.org.uk \
--to=joseph@codesourcery.com \
--cc=alex.bennee@linaro.org \
--cc=aurelien@aurel32.net \
--cc=ehabkost@redhat.com \
--cc=laurent@vivier.eu \
--cc=pbonzini@redhat.com \
--cc=peter.maydell@linaro.org \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).