Re: [Qemu-devel] target-alpha: An approach to fp insn qualifiers

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Richard Henderson <rth@twiddle.net>
To: Laurent Desnogues <laurent.desnogues@gmail.com>
Cc: qemu-devel@nongnu.org
Subject: Re: [Qemu-devel] target-alpha: An approach to fp insn qualifiers
Date: Mon, 14 Dec 2009 16:31:43 -0800	[thread overview]
Message-ID: <4B26D8EF.10801@twiddle.net> (raw)
In-Reply-To: <761ea48b0912141211keb5bbben584d7fe76f44d78c@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 215 bytes --]

On 12/14/2009 12:11 PM, Laurent Desnogues wrote:
> I'll take a closer look at your patch tomorrow.

For the record, I believe this finishes what I had in mind for the 
exception handling there in op_handler.c.


r~

[-- Attachment #2: commit-fpu-3 --]
[-- Type: text/plain, Size: 30931 bytes --]

commit ce6c2abc1d5d437dde980b4addc7da0f0f5de252
Author: Richard Henderson <rth@twiddle.net>
Date:   Mon Dec 14 16:27:39 2009 -0800

    target-alpha: Implement arithmetic exceptions for IEEE fp.

diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
index c0dff4b..c1c0470 100644
--- a/target-alpha/cpu.h
+++ b/target-alpha/cpu.h
@@ -430,9 +430,13 @@ enum {
 };
 
 /* Arithmetic exception */
-enum {
-    EXCP_ARITH_OVERFLOW,
-};
+#define EXC_M_IOV	(1<<16)		/* Integer Overflow */
+#define EXC_M_INE	(1<<15)		/* Inexact result */
+#define EXC_M_UNF	(1<<14)		/* Underflow */
+#define EXC_M_FOV	(1<<13)		/* Overflow */
+#define EXC_M_DZE	(1<<12)		/* Division by zero */
+#define EXC_M_INV	(1<<11)		/* Invalid operation */
+#define EXC_M_SWC	(1<<10)		/* Software completion */
 
 enum {
     IR_V0   = 0,
diff --git a/target-alpha/helper.c b/target-alpha/helper.c
index a658f97..a29f785 100644
--- a/target-alpha/helper.c
+++ b/target-alpha/helper.c
@@ -27,41 +27,13 @@
 
 uint64_t cpu_alpha_load_fpcr (CPUState *env)
 {
-    uint64_t ret = 0;
-    int flags, mask;
-
-    flags = env->fp_status.float_exception_flags;
-    ret |= (uint64_t) flags << 52;
-    if (flags)
-        ret |= FPCR_SUM;
-    env->ipr[IPR_EXC_SUM] &= ~0x3E;
-    env->ipr[IPR_EXC_SUM] |= flags << 1;
-
-    mask = env->fp_status.float_exception_mask;
-    if (mask & float_flag_invalid)
-        ret |= FPCR_INVD;
-    if (mask & float_flag_divbyzero)
-        ret |= FPCR_DZED;
-    if (mask & float_flag_overflow)
-        ret |= FPCR_OVFD;
-    if (mask & float_flag_underflow)
-        ret |= FPCR_UNFD;
-    if (mask & float_flag_inexact)
-        ret |= FPCR_INED;
-
-    switch (env->fp_status.float_rounding_mode) {
-    case float_round_nearest_even:
-        ret |= 2ULL << FPCR_DYN_SHIFT;
-        break;
-    case float_round_down:
-        ret |= 1ULL << FPCR_DYN_SHIFT;
-        break;
-    case float_round_up:
-        ret |= 3ULL << FPCR_DYN_SHIFT;
-        break;
-    case float_round_to_zero:
-        break;
-    }
+    uint64_t ret = env->fp_status.float_exception_flags;
+
+    if (ret)
+      ret = FPCR_SUM | (ret << 52);
+
+    ret |= env->fpcr & ~(FPCR_SUM | FPCR_STATUS_MASK);
+
     return ret;
 }
 
@@ -69,6 +41,8 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
 {
     int round_mode, mask;
 
+    env->fpcr = val;
+
     set_float_exception_flags((val >> 52) & 0x3F, &env->fp_status);
 
     mask = 0;
@@ -86,6 +60,7 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
 
     switch ((val >> FPCR_DYN_SHIFT) & 3) {
     case 0:
+    default:
         round_mode = float_round_to_zero;
         break;
     case 1:
@@ -99,6 +74,11 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
         break;
     }
     set_float_rounding_mode(round_mode, &env->fp_status);
+
+    mask = 0;
+    if ((val & (FPCR_UNDZ|FPCR_UNFD)) == (FPCR_UNDZ|FPCR_UNFD))
+        mask = 1;
+    set_flush_to_zero(mask, &env->fp_status);
 }
 
 #if defined(CONFIG_USER_ONLY)
diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
index 3bb0020..d031f56 100644
--- a/target-alpha/op_helper.c
+++ b/target-alpha/op_helper.c
@@ -78,7 +78,7 @@ uint64_t helper_addqv (uint64_t op1, uint64_t op2)
     uint64_t tmp = op1;
     op1 += op2;
     if (unlikely((tmp ^ op2 ^ (-1ULL)) & (tmp ^ op1) & (1ULL << 63))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return op1;
 }
@@ -88,7 +88,7 @@ uint64_t helper_addlv (uint64_t op1, uint64_t op2)
     uint64_t tmp = op1;
     op1 = (uint32_t)(op1 + op2);
     if (unlikely((tmp ^ op2 ^ (-1UL)) & (tmp ^ op1) & (1UL << 31))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return op1;
 }
@@ -98,7 +98,7 @@ uint64_t helper_subqv (uint64_t op1, uint64_t op2)
     uint64_t res;
     res = op1 - op2;
     if (unlikely((op1 ^ op2) & (res ^ op1) & (1ULL << 63))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return res;
 }
@@ -108,7 +108,7 @@ uint64_t helper_sublv (uint64_t op1, uint64_t op2)
     uint32_t res;
     res = op1 - op2;
     if (unlikely((op1 ^ op2) & (res ^ op1) & (1UL << 31))) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return res;
 }
@@ -118,7 +118,7 @@ uint64_t helper_mullv (uint64_t op1, uint64_t op2)
     int64_t res = (int64_t)op1 * (int64_t)op2;
 
     if (unlikely((int32_t)res != res)) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return (int64_t)((int32_t)res);
 }
@@ -130,7 +130,7 @@ uint64_t helper_mulqv (uint64_t op1, uint64_t op2)
     muls64(&tl, &th, op1, op2);
     /* If th != 0 && th != -1, then we had an overflow */
     if (unlikely((th + 1) > 1)) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
     }
     return tl;
 }
@@ -370,87 +370,175 @@ uint64_t helper_unpkbw (uint64_t op1)
 
 /* Floating point helpers */
 
+/* ??? Not implemented is setting EXC_MASK, containing a bitmask of
+   destination registers of instructions that have caused arithmetic
+   traps.  Not needed for userspace emulation, or for complete 
+   emulation of the entire fpu stack within qemu.  But we would need
+   it to invoke a guest kernel's entArith trap handler properly.
+   
+   It would be possible to encode the FP destination register in the
+   QUAL parameter for the FPU helpers below; additional changes would
+   be required for ADD/V et al above.  */
+
+#define QUAL_RM_N	0x080	/* Round mode nearest even */
+#define QUAL_RM_C	0x000	/* Round mode chopped */
+#define QUAL_RM_M	0x040	/* Round mode minus infinity */
+#define QUAL_RM_D	0x0c0	/* Round mode dynamic */
+#define QUAL_RM_MASK	0x0c0
+
+#define QUAL_U		0x100	/* Underflow enable (fp output) */
+#define QUAL_V		0x100	/* Overflow enable (int output) */
+#define QUAL_S		0x400	/* Software completion enable */
+#define QUAL_I		0x200	/* Inexact detection enable */
+
 /* If the floating-point qualifiers specified a rounding mode,
    set that rounding mode and remember the original mode for
    resetting at the end of the instruction.  */
-static inline uint32_t begin_quals_roundmode(uint32_t qual)
+static inline uint32_t begin_fp_roundmode(uint32_t qual)
 {
     uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = rm;
 
-    switch (qual & 0xc0) {
-    case 0x80:
+    switch (qual & QUAL_RM_MASK) {
+    default:
+    case QUAL_RM_N:
         rm = float_round_nearest_even;
         break;
-    case 0x00:
+    case QUAL_RM_C:
         rm = float_round_to_zero;
         break;
-    case 0x40:
+    case QUAL_RM_M:
         rm = float_round_down;
         break;
-    case 0xc0:
+    case QUAL_RM_D:
         return old_rm;
     }
-    set_float_rounding_mode(rm, &FP_STATUS);
+    if (old_rm != rm)
+        set_float_rounding_mode(rm, &FP_STATUS);
     return old_rm;
 }
 
-/* If the floating-point qualifiers specified extra exception processing
-   (i.e. /u or /su), zero the exception flags so that we can determine if
-   the current instruction raises any exceptions.  Save the old acrued
-   exception status so that we can restore them at the end of the insn.  */
-static inline uint32_t begin_quals_exception(uint32_t qual)
+/* Zero the exception flags so that we can determine if the current
+   instruction raises any exceptions.  Save the old acrued exception
+   status so that we can restore them at the end of the insn.  */
+static inline uint32_t begin_fp_exception(void)
 {
-    uint32_t old_exc = 0;
-    if (qual & 0x500) {
-        old_exc = (uint32_t)FP_STATUS.float_exception_flags << 8;
-        set_float_exception_flags(0, &FP_STATUS);
-    }
+    uint32_t old_exc = (uint32_t)FP_STATUS.float_exception_flags << 8;
+    set_float_exception_flags(0, &FP_STATUS);
     return old_exc;
 }
 
+static inline uint32_t begin_fp_flush_to_zero(uint32_t quals)
+{
+    /* If underflow detection is disabled, silently flush to zero.
+       Note that flush-to-zero mode may already be enabled via the FPCR.  */
+    if ((quals & QUAL_U) == 0 && !FP_STATUS.flush_to_zero) {
+        set_flush_to_zero(1, &FP_STATUS);
+        return 0x10000;
+    }
+    return 0;
+}
+
 /* Begin processing an fp operation.  Return a token that should be passed
    when completing the fp operation.  */
-static inline uint32_t begin_fp(uint32_t quals)
+static uint32_t begin_fp(uint32_t quals)
 {
     uint32_t ret = 0;
 
-    ret |= begin_quals_roundmode(quals);
-    ret |= begin_quals_exception(quals);
+    ret |= begin_fp_roundmode(quals);
+    ret |= begin_fp_flush_to_zero(quals);
+    ret |= begin_fp_exception();
 
     return ret;
 }
 
 /* End processing an fp operation.  */
-static inline void end_fp(uint32_t quals, uint32_t orig)
-{
-    uint8_t exc = FP_STATUS.float_exception_flags;
 
-    set_float_exception_flags(exc | (orig >> 8), &FP_STATUS);
-    set_float_rounding_mode(orig & 0xff, &FP_STATUS);
+static inline void end_fp_roundmode(uint32_t orig)
+{
+    uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = orig & 0xff;
+    if (unlikely(rm != old_rm))
+        set_float_rounding_mode(old_rm, &FP_STATUS);
+}
 
-    /* TODO: check quals and exc and raise any exceptions needed.  */
+static inline void end_fp_flush_to_zero(uint32_t orig)
+{
+    if (orig & 0x10000)
+        set_flush_to_zero(0, &FP_STATUS);
 }
 
-/* Raise any exceptions needed for using F, given the insn qualifiers.  */
-static inline void float32_input(uint32_t quals, float32 f)
+static void end_fp_exception(uint32_t quals, uint32_t orig)
 {
-    /* If /s is used, no exceptions are raised immediately.  */
-    /* ??? This for userspace only.  If we are emulating the real hw, then
-       we may well need to trap to the kernel for software emulation.  */
-    /* ??? Shouldn't we raise an exception for SNAN?  */
-    if (quals & 0x500)
-        return;
-    /* TODO: Check for inf, nan, denormal and trap.  */
+    uint8_t exc = FP_STATUS.float_exception_flags;
+
+    /* If inexact detection is disabled, silently clear it.  */
+    if ((quals & QUAL_I) == 0)
+        exc &= ~float_flag_inexact;
+
+    orig = (orig >> 8) & 0xff;
+    set_float_exception_flags(exc | orig, &FP_STATUS);
+
+    /* Raise an exception as required.  */
+    if (unlikely(exc)) {
+        if (quals & QUAL_S)
+            exc &= ~FP_STATUS.float_exception_mask;
+        if (exc) {
+            uint32_t hw_exc = 0;
+
+            if (exc & float_flag_invalid)
+                hw_exc |= EXC_M_INV;
+            if (exc & float_flag_divbyzero)
+                hw_exc |= EXC_M_DZE;
+            if (exc & float_flag_overflow)
+                hw_exc |= EXC_M_FOV;
+            if (exc & float_flag_underflow)
+                hw_exc |= EXC_M_UNF;
+            if (exc & float_flag_inexact)
+                hw_exc |= EXC_M_INE;
+
+            helper_excp(EXCP_ARITH, hw_exc);
+        }
+    }
 }
 
-static inline void float64_input(uint32_t quals, float64 f)
+static void end_fp(uint32_t quals, uint32_t orig)
 {
-    /* TODO: Exactly like above, except for float64.  */
+    end_fp_roundmode(orig);
+    end_fp_flush_to_zero(orig);
+    end_fp_exception(quals, orig);
 }
 
+static uint64_t remap_ieee_input(uint32_t quals, uint64_t a)
+{
+    uint64_t frac;
+    uint32_t exp;
+
+    exp = (uint32_t)(a >> 52) & 0x7ff;
+    frac = a & 0xfffffffffffffull;
+
+    if (exp == 0) {
+        if (frac != 0) {
+            /* If DNZ is set, flush denormals to zero on input.  */
+            if (env->fpcr & FPCR_DNZ)
+                a = a & (1ull << 63);
+            /* If software completion not enabled, trap.  */
+            else if ((quals & QUAL_S) == 0)
+                helper_excp(EXCP_ARITH, EXC_M_UNF);
+        }
+    } else if (exp == 0x7ff) {
+        /* Infinity or NaN.  If software completion is not enabled, trap.
+           If /s is enabled, we'll properly signal for SNaN on output.  */
+        /* ??? I'm not sure these exception bit flags are correct.  I do
+           know that the Linux kernel, at least, doesn't rely on them and
+           just emulates the insn to figure out what exception to use.  */
+        if ((quals & QUAL_S) == 0)
+            helper_excp(EXCP_ARITH, frac ? EXC_M_INV : EXC_M_FOV);
+    }
+
+    return a;
+}
 
 /* F floating (VAX) */
-static inline uint64_t float32_to_f(float32 fa)
+static uint64_t float32_to_f(float32 fa)
 {
     uint64_t r, exp, mant, sig;
     CPU_FloatU a;
@@ -483,7 +571,7 @@ static inline uint64_t float32_to_f(float32 fa)
     return r;
 }
 
-static inline float32 f_to_float32(uint64_t a)
+static float32 f_to_float32(uint64_t a)
 {
     uint32_t exp, mant_sig;
     CPU_FloatU r;
@@ -535,8 +623,6 @@ uint64_t helper_addf (uint64_t a, uint64_t b, uint32_t quals)
     fb = f_to_float32(b);
 
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
     fr = float32_add(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -552,8 +638,6 @@ uint64_t helper_subf (uint64_t a, uint64_t b, uint32_t quals)
     fb = f_to_float32(b);
 
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
     fr = float32_sub(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -569,8 +653,6 @@ uint64_t helper_mulf (uint64_t a, uint64_t b, uint32_t quals)
     fb = f_to_float32(b);
 
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
     fr = float32_mul(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -586,8 +668,6 @@ uint64_t helper_divf (uint64_t a, uint64_t b, uint32_t quals)
     fb = f_to_float32(b);
 
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
     fr = float32_div(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -602,7 +682,6 @@ uint64_t helper_sqrtf (uint64_t t, uint32_t quals)
     ft = f_to_float32(t);
 
     token = begin_fp(quals);
-    float32_input(quals, ft);
     fr = float32_sqrt(ft, &FP_STATUS);
     end_fp(quals, token);
 
@@ -611,7 +690,7 @@ uint64_t helper_sqrtf (uint64_t t, uint32_t quals)
 
 
 /* G floating (VAX) */
-static inline uint64_t float64_to_g(float64 fa)
+static uint64_t float64_to_g(float64 fa)
 {
     uint64_t r, exp, mant, sig;
     CPU_DoubleU a;
@@ -644,7 +723,7 @@ static inline uint64_t float64_to_g(float64 fa)
     return r;
 }
 
-static inline float64 g_to_float64(uint64_t a)
+static float64 g_to_float64(uint64_t a)
 {
     uint64_t exp, mant_sig;
     CPU_DoubleU r;
@@ -696,8 +775,6 @@ uint64_t helper_addg (uint64_t a, uint64_t b, uint32_t quals)
     fb = g_to_float64(b);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
     fr = float64_add(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -713,8 +790,6 @@ uint64_t helper_subg (uint64_t a, uint64_t b, uint32_t quals)
     fb = g_to_float64(b);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
     fr = float64_sub(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -730,8 +805,6 @@ uint64_t helper_mulg (uint64_t a, uint64_t b, uint32_t quals)
     fb = g_to_float64(b);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
     fr = float64_mul(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -747,8 +820,6 @@ uint64_t helper_divg (uint64_t a, uint64_t b, uint32_t quals)
     fb = g_to_float64(b);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
     fr = float64_div(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -763,7 +834,6 @@ uint64_t helper_sqrtg (uint64_t a, uint32_t quals)
     fa = g_to_float64(a);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
     fr = float64_sqrt(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -774,7 +844,7 @@ uint64_t helper_sqrtg (uint64_t a, uint32_t quals)
 /* S floating (single) */
 
 /* Taken from linux/arch/alpha/kernel/traps.c, s_mem_to_reg.  */
-static inline uint64_t float32_to_s_int(uint32_t fi)
+static uint64_t float32_to_s_int(uint32_t fi)
 {
     uint32_t frac = fi & 0x7fffff;
     uint32_t sign = fi >> 31;
@@ -796,7 +866,7 @@ static inline uint64_t float32_to_s_int(uint32_t fi)
             | ((uint64_t)frac << 29));
 }
 
-static inline uint64_t float32_to_s(float32 fa)
+static uint64_t float32_to_s(float32 fa)
 {
     CPU_FloatU a;
     a.f = fa;
@@ -825,17 +895,19 @@ uint64_t helper_memory_to_s (uint32_t a)
     return float32_to_s_int(a);
 }
 
+static float32 input_s(uint32_t quals, uint64_t a)
+{
+    return s_to_float32(remap_ieee_input(quals, a));
+}
+
 uint64_t helper_adds (uint64_t a, uint64_t b, uint32_t quals)
 {
     float32 fa, fb, fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_add(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -847,12 +919,9 @@ uint64_t helper_subs (uint64_t a, uint64_t b, uint32_t quals)
     float32 fa, fb, fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_sub(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -864,12 +933,9 @@ uint64_t helper_muls (uint64_t a, uint64_t b, uint32_t quals)
     float32 fa, fb, fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_mul(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -881,12 +947,9 @@ uint64_t helper_divs (uint64_t a, uint64_t b, uint32_t quals)
     float32 fa, fb, fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-    fb = s_to_float32(b);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
-    float32_input(quals, fb);
+    fa = input_s(quals, a);
+    fb = input_s(quals, b);
     fr = float32_div(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -898,10 +961,8 @@ uint64_t helper_sqrts (uint64_t a, uint32_t quals)
     float32 fa, fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
+    fa = input_s(quals, a);
     fr = float32_sqrt(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -926,17 +987,20 @@ static inline uint64_t float64_to_t(float64 fa)
     return r.ll;
 }
 
+/* Raise any exceptions needed for using F, given the insn qualifiers.  */
+static float64 input_t(uint32_t quals, uint64_t a)
+{
+    return t_to_float64(remap_ieee_input(quals, a));
+}
+
 uint64_t helper_addt (uint64_t a, uint64_t b, uint32_t quals)
 {
     float64 fa, fb, fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_add(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -948,12 +1012,9 @@ uint64_t helper_subt (uint64_t a, uint64_t b, uint32_t quals)
     float64 fa, fb, fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_sub(fa, fb, &FP_STATUS);
     end_fp(quals, token);
     
@@ -965,12 +1026,9 @@ uint64_t helper_mult (uint64_t a, uint64_t b, uint32_t quals)
     float64 fa, fb, fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_mul(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -982,12 +1040,9 @@ uint64_t helper_divt (uint64_t a, uint64_t b, uint32_t quals)
     float64 fa, fb, fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-    fb = t_to_float64(b);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
-    float64_input(quals, fb);
+    fa = input_t(quals, a);
+    fb = input_t(quals, b);
     fr = float64_div(fa, fb, &FP_STATUS);
     end_fp(quals, token);
 
@@ -999,10 +1054,8 @@ uint64_t helper_sqrtt (uint64_t a, uint32_t quals)
     float64 fa, fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
+    fa = input_t(quals, a);
     fr = float64_sqrt(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -1028,6 +1081,8 @@ uint64_t helper_cpyse(uint64_t a, uint64_t b)
 
 
 /* Comparisons */
+/* ??? Software completion qualifier missing.  */
+
 uint64_t helper_cmptun (uint64_t a, uint64_t b)
 {
     float64 fa, fb;
@@ -1126,10 +1181,8 @@ uint64_t helper_cvtts (uint64_t a, uint32_t quals)
     float32 fr;
     uint32_t token;
 
-    fa = t_to_float64(a);
-
     token = begin_fp(quals);
-    float64_input(quals, fa);
+    fa = input_t(quals, a);
     fr = float64_to_float32(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -1142,10 +1195,8 @@ uint64_t helper_cvtst (uint64_t a, uint32_t quals)
     float64 fr;
     uint32_t token;
 
-    fa = s_to_float32(a);
-
     token = begin_fp(quals);
-    float32_input(quals, fa);
+    fa = input_s(quals, a);
     fr = float32_to_float64(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -1164,115 +1215,125 @@ uint64_t helper_cvtqs (uint64_t a, uint32_t quals)
     return float32_to_s(fr);
 }
 
-uint64_t helper_cvttq (uint64_t a, uint32_t quals)
-{
-    uint64_t ret, frac;
-    uint32_t token, exp, sign, exc = 0;
-
-    token = begin_fp(quals);
+/* Implement float64 to uint64 conversion without overflow enabled.
+   In this mode we must supply the truncated result.  This behaviour
+   is used by the compiler to get unsigned conversion for free with
+   the same instruction.  */
 
-    /* Alpha integer conversion does not saturate, as the generic routine
-       does.  Instead it supplies a truncated result.  This fact is relied
-       upon by GCC in that without overflow enabled we can get unsigned
-       conversion for free with the same instruction.  */
+static uint64_t cvttq_noqual_internal(uint64_t a, uint32_t rounding_mode)
+{
+    uint64_t frac, ret = 0;
+    uint32_t exp, sign;
+    int shift;
 
     sign = (a >> 63);
     exp = (uint32_t)(a >> 52) & 0x7ff;
     frac = a & 0xfffffffffffffull;
 
-    if (exp == 0) {
-        ret = 0;
-        if (frac != 0) {
-            /* ??? If DNZ set, map to zero without trapping.  */
-            /* ??? Figure out what kind of exception signal to send.  */
-            if (!(quals & 0x400))
-                helper_excp(EXCP_ARITH, 0);
-            goto do_underflow;
-        }
-    } else if (exp == 0x7ff) {
-        /* In keeping with the truncation result, both infinity and NaN
-           give result of zero.  See Table B-2 in the Alpha Architecture
-           Handbook.  */
-        ret = 0;
-        exc = float_flag_invalid;
+    /* We already handled denormals in remap_ieee_input; infinities and
+       nans are defined to return zero as per truncation.  */
+    if (exp == 0 || exp == 0x7ff)
+        return 0;
 
-        /* Without /s qualifier, both Inf and NaN trap.  SNaN always traps. */
-        if (!(quals & 0x400) || (frac & 0x4000000000000ull))
-            helper_excp(EXCP_ARITH, 0);
+    /* Restore implicit bit.  */
+    frac |= 0x10000000000000ull;
+
+    /* Note that neither overflow exceptions nor inexact exceptions
+       are desired.  This lets us streamline the checks quite a bit.  */
+    shift = exp - 1023 - 52;
+    if (shift >= 0) {
+        /* In this case the number is so large that we must shift
+           the fraction left.  There is no rounding to do.  */
+        if (shift < 63) {
+            ret = frac << shift;
+        }
     } else {
-        int32_t shift;
-
-        /* Restore implicit bit.  */
-        frac |= 0x10000000000000ull;
-
-        shift = exp - 1023 - 52;
-        if (shift > 0) {
-            /* In this case the number is so large that we must shift
-               the fraction left.  There is no rounding to do, but we
-               must still set inexact for overflow.  */
-            if (shift < 63) {
-                ret = frac << shift;
-                if ((ret >> shift) != frac)
-                    exc = float_flag_inexact;
-            } else {
-                exc = float_flag_inexact;
-                ret = 0;
-            }
-        } else if (shift == 0) {
-            /* The exponent is exactly right for the 52-bit fraction.  */
-            ret = frac;
+        uint64_t round;
+
+        /* In this case the number is smaller than the fraction as
+           represented by the 52 bit number.  Here we must think 
+           about rounding the result.  Handle this by shifting the
+           fractional part of the number into the high bits of ROUND.
+           This will let us efficiently handle round-to-nearest.  */
+        shift = -shift;
+        if (shift < 63) {
+            ret = frac >> shift;
+            round = frac << (64 - shift);
         } else {
-            uint64_t round;
-
-            /* In this case the number is smaller than the fraction as
-               represented by the 52 bit number.  Here we must think 
-               about rounding the result.  Handle this by shifting the
-               fractional part of the number into the high bits of ROUND.
-               This will let us efficiently handle round-to-nearest.  */
-            shift = -shift;
-            if (shift < 63) {
-                ret = frac >> shift;
-                round = frac << (64 - shift);
-            } else {
-            do_underflow:
-                /* The exponent is so small we shift out everything.  */
-                ret = 0;
-                round = 1;
-            }
+            /* The exponent is so small we shift out everything.
+               Leave a sticky bit for proper rounding below.  */
+            round = 1;
+        }
 
-            if (round) {
-                exc = float_flag_inexact;
-                switch (FP_STATUS.float_rounding_mode) {
-                case float_round_nearest_even:
-                    if (round == (1ull << 63)) {
-                        /* The remaining fraction is exactly 0.5;
-                           round to even.  */
-                        ret += (ret & 1);
-                    } else if (round > (1ull << 63)) {
-                        ret += 1;
-                    }
-                    break;
-                case float_round_to_zero:
-                    break;
-                case float_round_up:
-                    if (!sign)
-                        ret += 1;
-                    break;
-                case float_round_down:
-                    if (sign)
-                        ret += 1;
-                    break;
+        if (round) {
+            switch (rounding_mode) {
+            case float_round_nearest_even:
+                if (round == (1ull << 63)) {
+                    /* Remaining fraction is exactly 0.5; round to even.  */
+                    ret += (ret & 1);
+                } else if (round > (1ull << 63)) {
+                    ret += 1;
                 }
+                break;
+            case float_round_to_zero:
+                break;
+            case float_round_up:
+                if (!sign)
+                    ret += 1;
+                break;
+            case float_round_down:
+                if (sign)
+                    ret += 1;
+                break;
             }
         }
-
-        if (sign)
-            ret = -ret;
     }
 
-    if (exc)
-        float_raise(exc, &FP_STATUS);
-    end_fp(quals, token);
+    if (sign)
+        ret = -ret;
+    return ret;
+}
+
+uint64_t helper_cvttq (uint64_t a, uint32_t quals)
+{
+    uint64_t ret;
+
+    a = remap_ieee_input(quals, a);
+
+    if (quals & QUAL_V) {
+        float64 fa = t_to_float64(a);
+        uint32_t token;
+
+        token = begin_fp_exception();
+        if ((quals & QUAL_RM_MASK) == QUAL_RM_C) {
+            ret = float64_to_int64_round_to_zero(fa, &FP_STATUS);
+        } else {
+            token |= begin_fp_roundmode(quals);
+            ret = float64_to_int64(fa, &FP_STATUS);
+            end_fp_roundmode(token);
+        }
+        end_fp_exception(quals, token);
+    } else {
+        uint32_t round_mode;
+
+        switch (quals & QUAL_RM_MASK) {
+        case QUAL_RM_N:
+            round_mode = float_round_nearest_even;
+            break;
+        case QUAL_RM_C:
+        default:
+            round_mode = float_round_to_zero;
+            break;
+        case QUAL_RM_M:
+            round_mode = float_round_down;
+            break;
+        case QUAL_RM_D:
+            round_mode = FP_STATUS.float_rounding_mode;
+            break;
+        }
+
+        ret = cvttq_noqual_internal(a, round_mode);
+    }
 
     return ret;
 }
@@ -1310,7 +1371,6 @@ uint64_t helper_cvtgf (uint64_t a, uint32_t quals)
     fa = g_to_float64(a);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
     fr = float64_to_float32(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -1326,7 +1386,6 @@ uint64_t helper_cvtgq (uint64_t a, uint32_t quals)
     fa = g_to_float64(a);
 
     token = begin_fp(quals);
-    float64_input(quals, fa);
     ret = float64_to_int64(fa, &FP_STATUS);
     end_fp(quals, token);
 
@@ -1352,35 +1411,24 @@ uint64_t helper_cvtlq (uint64_t a)
     return (lo & 0x3FFFFFFF) | (hi & 0xc0000000);
 }
 
-static inline uint64_t __helper_cvtql(uint64_t a, int s, int v)
-{
-    uint64_t r;
-
-    r = ((uint64_t)(a & 0xC0000000)) << 32;
-    r |= ((uint64_t)(a & 0x7FFFFFFF)) << 29;
-
-    if (v && (int64_t)((int32_t)r) != (int64_t)r) {
-        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
-    }
-    if (s) {
-        /* TODO */
-    }
-    return r;
-}
-
 uint64_t helper_cvtql (uint64_t a)
 {
-    return __helper_cvtql(a, 0, 0);
+    return ((a & 0xC0000000) << 32) | ((a & 0x7FFFFFFF) << 29);
 }
 
 uint64_t helper_cvtqlv (uint64_t a)
 {
-    return __helper_cvtql(a, 0, 1);
+    if ((int32_t)a != (int64_t)a)
+        helper_excp(EXCP_ARITH, EXC_M_IOV);
+    return helper_cvtql(a);
 }
 
 uint64_t helper_cvtqlsv (uint64_t a)
 {
-    return __helper_cvtql(a, 1, 1);
+    /* ??? I'm pretty sure there's nothing that /sv needs to do that /v
+       doesn't do.  The only thing I can think is that /sv is a valid
+       instruction merely for completeness in the ISA.  */
+    return helper_cvtqlv(a);
 }
 
 /* PALcode support special instructions */

next prev parent reply	other threads:[~2009-12-15  0:31 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-12-14 18:02 [Qemu-devel] target-alpha: An approach to fp insn qualifiers Richard Henderson
2009-12-14 20:11 ` Laurent Desnogues
2009-12-14 22:21   ` Richard Henderson
2009-12-15  0:31   ` Richard Henderson [this message]
2009-12-15  3:50     ` Richard Henderson
2009-12-15 11:31       ` Laurent Desnogues
2009-12-15 16:17         ` Richard Henderson
2009-12-15 17:32       ` Vince Weaver
2009-12-17 17:18       ` Vince Weaver

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:c0dff4b dfblob:c1c0470 dfblob:a658f97 dfblob:a29f785
dfblob:3bb0020 dfblob:d031f56 )
 OR (
bs:"Re: [Qemu-devel] target-alpha: An approach to fp insn qualifiers" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4B26D8EF.10801@twiddle.net \
    --to=rth@twiddle.net \
    --cc=laurent.desnogues@gmail.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.