[Qemu-devel] [PATCH 15/19] Add VSX xmax/xmin Instructions

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Tom Musta <tommusta@gmail.com>
To: QEMU Developers <qemu-devel@nongnu.org>
Cc: Tom Musta <tommusta@gmail.com>,
	"qemu-ppc@nongnu.org" <qemu-ppc@nongnu.org>
Subject: [Qemu-devel] [PATCH 15/19] Add VSX xmax/xmin Instructions
Date: Thu, 24 Oct 2013 11:26:12 -0500	[thread overview]
Message-ID: <52694A24.3050509@gmail.com> (raw)
In-Reply-To: <526947CA.4020504@gmail.com>

This patch adds the VSX floating point maximum and minimum
instructions:

   - xsmaxdp, xvmaxdp, xvmaxsp
   - xsmindp, xvmindp, xvminsp

Because of the Power ISA definitions of maximum and minimum
on various boundary cases, the standard softfloat comparison
routines (e.g. float64_lt) do not work as well as one might
think.  Therefore specific routines for comparing 64 and 32
bit floating point numbers are implemented in the PowerPC
helper code.

Signed-off-by: Tom Musta <tommusta@gmail.com>
---
  target-ppc/fpu_helper.c |  162 +++++++++++++++++++++++++++++++++++++++++++++++
  target-ppc/helper.h     |    6 ++
  target-ppc/translate.c  |   12 ++++
  3 files changed, 180 insertions(+), 0 deletions(-)

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 0373913..29b27ce 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -2323,3 +2323,165 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)                      \

  VSX_SCALAR_CMP(xscmpodp, 1)
  VSX_SCALAR_CMP(xscmpudp, 0)
+
+#define float64_snan_to_qnan(x) ((x) | 0x0008000000000000ul)
+
+static int compare_float64(float64 a, float64 b)
+{
+    uint64_t asgn = a & 0x8000000000000000ul;
+    uint64_t bsgn = b & 0x8000000000000000ul;
+
+    if (asgn != bsgn) {
+        return asgn ? -1 : 1;
+    }
+
+    uint64_t aexp = (a & 0x7FF0000000000000ul) >> (64-12);
+    uint64_t bexp = (b & 0x7FF0000000000000ul) >> (64-12);
+
+    if (aexp < bexp) {
+        return asgn ? 1 : -1;
+    } else if (aexp > bexp) {
+        return asgn ? -1 : 1;
+    } else {
+        uint64_t afrac = a & 0x000FFFFFFFFFFFFFul;
+        uint64_t bfrac = b & 0x000FFFFFFFFFFFFFul;
+
+        if (afrac < bfrac) {
+            return asgn ? 1 : -1;
+        } else if (afrac > bfrac) {
+            return asgn ? -1 : 1;
+        } else {
+            return 0;
+        }
+    }
+}
+
+#define float32_snan_to_qnan(x) ((x) | 0x00400000)
+
+static int compare_float32(float32 a, float32 b)
+{
+    uint64_t asgn = a & 0x80000000;
+    uint64_t bsgn = b & 0x80000000;
+
+    if (asgn != bsgn) {
+        return asgn ? -1 : 1;
+    }
+
+    uint64_t aexp = (a & 0x7FC00000) >> (32-9);
+    uint64_t bexp = (b & 0x7FF00000) >> (32-9);
+
+    if (aexp < bexp) {
+        return asgn ? 1 : -1;
+    } else if (aexp > bexp) {
+        return asgn ? -1 : 1;
+    } else {
+        uint64_t afrac = a & 0x007FFFFF;
+        uint64_t bfrac = b & 0x007FFFFF;
+
+        if (afrac < bfrac) {
+            return asgn ? 1 : -1;
+        } else if (afrac > bfrac) {
+            return asgn ? -1 : 1;
+        } else {
+            return 0;
+        }
+    }
+}
+
+/* VSX_MAX - VSX floating point maximum
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ */
+#define VSX_MAX(op, nels, tp, fld)                                            \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
+{                                                                             \
+    ppc_vsr_t xt, xa, xb;                                                     \
+    int i;                                                                    \
+                                                                              \
+    getVSR(xA(opcode), &xa, env);                                             \
+    getVSR(xB(opcode), &xb, env);                                             \
+    getVSR(xT(opcode), &xt, env);                                             \
+                                                                              \
+    for (i = 0; i < nels; i++) {                                              \
+        if (unlikely(tp##_is_any_nan(xa.fld[i]) ||                            \
+                     tp##_is_any_nan(xb.fld[i]))) {                           \
+            if (tp##_is_signaling_nan(xa.fld[i])) {                           \
+                xt.fld[i] = tp##_snan_to_qnan(xa.fld[i]);                     \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);        \
+            } else if (tp##_is_signaling_nan(xb.fld[i])) {                    \
+                xt.fld[i] = tp##_snan_to_qnan(xb.fld[i]);                     \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);        \
+            } else if (tp##_is_quiet_nan(xb.fld[i])) {                        \
+                xt.fld[i] = xa.fld[i];                                        \
+            } else { /* XA is QNaN */                                         \
+                xt.fld[i] = xb.fld[i];                                        \
+            }                                                                 \
+        } else if (unlikely(tp##_is_infinity(xa.fld[i]))) {                   \
+            xt.fld[i] = tp##_is_neg(xa.fld[i]) ? xb.fld[i] : xa.fld[i];       \
+        } else if (unlikely(tp##_is_infinity(xb.fld[i]))) {                   \
+            xt.fld[i] = tp##_is_neg(xb.fld[i]) ? xa.fld[i] : xb.fld[i];       \
+        }                                                                     \
+        else {                                                                \
+            xt.fld[i] = (compare_##tp(xa.fld[i], xb.fld[i]) < 0) ?            \
+                        xb.fld[i] : xa.fld[i];                                \
+        }                                                                     \
+    }                                                                         \
+                                                                              \
+    putVSR(xT(opcode), &xt, env);                                             \
+    helper_float_check_status(env);                                           \
+}
+
+VSX_MAX(xsmaxdp, 1, float64, f64)
+VSX_MAX(xvmaxdp, 2, float64, f64)
+VSX_MAX(xvmaxsp, 4, float32, f32)
+
+/* VSX_MIN - VSX floating point minimum
+ *   op    - instruction mnemonic
+ *   nels  - number of elements (1, 2 or 4)
+ *   tp    - type (float32 or float64)
+ *   fld   - vsr_t field (f32 or f64)
+ */
+#define VSX_MIN(op, nels, tp, fld)                                            \
+void helper_##op(CPUPPCState *env, uint32_t opcode)                           \
+{                                                                             \
+    ppc_vsr_t xt, xa, xb;                                                     \
+    int i;                                                                    \
+                                                                              \
+    getVSR(xA(opcode), &xa, env);                                             \
+    getVSR(xB(opcode), &xb, env);                                             \
+    getVSR(xT(opcode), &xt, env);                                             \
+                                                                              \
+    for (i = 0; i < nels; i++) {                                              \
+        if (unlikely(tp##_is_any_nan(xa.fld[i]) ||                            \
+                     tp##_is_any_nan(xb.fld[i]))) {                           \
+            if (tp##_is_signaling_nan(xa.fld[i])) {                           \
+                xt.fld[i] = tp##_snan_to_qnan(xa.fld[i]);                     \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);        \
+            } else if (tp##_is_signaling_nan(xb.fld[i])) {                    \
+                xt.fld[i] = tp##_snan_to_qnan(xb.fld[i]);                     \
+                fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 0);        \
+            } else if (tp##_is_quiet_nan(xb.fld[i])) {                        \
+                xt.fld[i] = xa.fld[i];                                        \
+            } else { /* XA is QNaN */                                         \
+                xt.fld[i] = xb.fld[i];                                        \
+            }                                                                 \
+        } else if (unlikely(tp##_is_infinity(xa.fld[i]))) {                   \
+            xt.fld[i] = tp##_is_neg(xa.fld[i]) ? xa.fld[i] : xb.fld[i];       \
+        } else if (unlikely(tp##_is_infinity(xb.fld[i]))) {                   \
+            xt.fld[i] = tp##_is_neg(xb.fld[i]) ? xb.fld[i] : xa.fld[i];       \
+        }                                                                     \
+        else {                                                                \
+            xt.fld[i] = (compare_##tp(xa.fld[i], xb.fld[i]) < 0) ?            \
+                        xa.fld[i] : xb.fld[i];                                \
+        }                                                                     \
+    }                                                                         \
+                                                                              \
+    putVSR(xT(opcode), &xt, env);                                             \
+    helper_float_check_status(env);                                           \
+}
+
+VSX_MIN(xsmindp, 1, float64, f64)
+VSX_MIN(xvmindp, 2, float64, f64)
+VSX_MIN(xvminsp, 4, float32, f32)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index bfb1964..40c523a 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -270,6 +270,8 @@ DEF_HELPER_2(xsnmsubadp, void, env, i32)
  DEF_HELPER_2(xsnmsubmdp, void, env, i32)
  DEF_HELPER_2(xscmpodp, void, env, i32)
  DEF_HELPER_2(xscmpudp, void, env, i32)
+DEF_HELPER_2(xsmaxdp, void, env, i32)
+DEF_HELPER_2(xsmindp, void, env, i32)

  DEF_HELPER_2(xvadddp, void, env, i32)
  DEF_HELPER_2(xvsubdp, void, env, i32)
@@ -288,6 +290,8 @@ DEF_HELPER_2(xvnmaddadp, void, env, i32)
  DEF_HELPER_2(xvnmaddmdp, void, env, i32)
  DEF_HELPER_2(xvnmsubadp, void, env, i32)
  DEF_HELPER_2(xvnmsubmdp, void, env, i32)
+DEF_HELPER_2(xvmaxdp, void, env, i32)
+DEF_HELPER_2(xvmindp, void, env, i32)

  DEF_HELPER_2(xvaddsp, void, env, i32)
  DEF_HELPER_2(xvsubsp, void, env, i32)
@@ -306,6 +310,8 @@ DEF_HELPER_2(xvnmaddasp, void, env, i32)
  DEF_HELPER_2(xvnmaddmsp, void, env, i32)
  DEF_HELPER_2(xvnmsubasp, void, env, i32)
  DEF_HELPER_2(xvnmsubmsp, void, env, i32)
+DEF_HELPER_2(xvmaxsp, void, env, i32)
+DEF_HELPER_2(xvminsp, void, env, i32)

  DEF_HELPER_2(efscfsi, i32, env, i32)
  DEF_HELPER_2(efscfui, i32, env, i32)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 053df68..67d5267 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7312,6 +7312,8 @@ GEN_VSX_HELPER_2(xsnmsubadp, 0x04, 0x16, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xsnmsubmdp, 0x04, 0x17, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xscmpodp, 0x0C, 0x05, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xscmpudp, 0x0C, 0x04, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmaxdp, 0x00, 0x14, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xsmindp, 0x00, 0x15, 0, PPC2_VSX)

  GEN_VSX_HELPER_2(xvadddp, 0x00, 0x0C, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvsubdp, 0x00, 0x0D, 0, PPC2_VSX)
@@ -7330,6 +7332,8 @@ GEN_VSX_HELPER_2(xvnmaddadp, 0x04, 0x1C, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvnmaddmdp, 0x04, 0x1D, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvnmsubadp, 0x04, 0x1E, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvnmsubmdp, 0x04, 0x1F, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaxdp, 0x00, 0x1C, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmindp, 0x00, 0x1D, 0, PPC2_VSX)

  GEN_VSX_HELPER_2(xvaddsp, 0x00, 0x08, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvsubsp, 0x00, 0x09, 0, PPC2_VSX)
@@ -7348,6 +7352,8 @@ GEN_VSX_HELPER_2(xvnmaddasp, 0x04, 0x18, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvnmaddmsp, 0x04, 0x19, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvnmsubasp, 0x04, 0x1A, 0, PPC2_VSX)
  GEN_VSX_HELPER_2(xvnmsubmsp, 0x04, 0x1B, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvmaxsp, 0x00, 0x18, 0, PPC2_VSX)
+GEN_VSX_HELPER_2(xvminsp, 0x00, 0x19, 0, PPC2_VSX)

  #define VSX_LOGICAL(name, tcg_op)                                    \
  static void glue(gen_, name)(DisasContext * ctx)                     \
@@ -10050,6 +10056,8 @@ GEN_XX3FORM(xsnmsubadp, 0x04, 0x16, PPC2_VSX),
  GEN_XX3FORM(xsnmsubmdp, 0x04, 0x17, PPC2_VSX),
  GEN_XX2FORM(xscmpodp,  0x0C, 0x05, PPC2_VSX),
  GEN_XX2FORM(xscmpudp,  0x0C, 0x04, PPC2_VSX),
+GEN_XX3FORM(xsmaxdp, 0x00, 0x14, PPC2_VSX),
+GEN_XX3FORM(xsmindp, 0x00, 0x15, PPC2_VSX),

  GEN_XX3FORM(xvadddp, 0x00, 0x0C, PPC2_VSX),
  GEN_XX3FORM(xvsubdp, 0x00, 0x0D, PPC2_VSX),
@@ -10068,6 +10076,8 @@ GEN_XX3FORM(xvnmaddadp, 0x04, 0x1C, PPC2_VSX),
  GEN_XX3FORM(xvnmaddmdp, 0x04, 0x1D, PPC2_VSX),
  GEN_XX3FORM(xvnmsubadp, 0x04, 0x1E, PPC2_VSX),
  GEN_XX3FORM(xvnmsubmdp, 0x04, 0x1F, PPC2_VSX),
+GEN_XX3FORM(xvmaxdp, 0x00, 0x1C, PPC2_VSX),
+GEN_XX3FORM(xvmindp, 0x00, 0x1D, PPC2_VSX),

  GEN_XX3FORM(xvaddsp, 0x00, 0x08, PPC2_VSX),
  GEN_XX3FORM(xvsubsp, 0x00, 0x09, PPC2_VSX),
@@ -10086,6 +10096,8 @@ GEN_XX3FORM(xvnmaddasp, 0x04, 0x18, PPC2_VSX),
  GEN_XX3FORM(xvnmaddmsp, 0x04, 0x19, PPC2_VSX),
  GEN_XX3FORM(xvnmsubasp, 0x04, 0x1A, PPC2_VSX),
  GEN_XX3FORM(xvnmsubmsp, 0x04, 0x1B, PPC2_VSX),
+GEN_XX3FORM(xvmaxsp, 0x00, 0x18, PPC2_VSX),
+GEN_XX3FORM(xvminsp, 0x00, 0x19, PPC2_VSX),

  #undef VSX_LOGICAL
  #define VSX_LOGICAL(name, opc2, opc3, fl2) \
-- 
1.7.1

next prev parent reply	other threads:[~2013-10-24 16:26 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-24 16:16 [Qemu-devel] [PATCH 00/19] PowerPC VSX Stage 3 Tom Musta
2013-10-24 16:17 ` [Qemu-devel] [PATCH 01/19] Add New softfloat Routines for VSX Tom Musta
2013-10-24 18:34   ` Richard Henderson
2013-10-25 11:34   ` Alex Bennée
2013-10-25 11:44     ` Peter Maydell
2013-10-25 13:09       ` Alex Bennée
2013-10-25 13:24       ` Tom Musta
2013-10-25 11:55   ` Peter Maydell
2013-10-25 13:01     ` Tom Musta
2013-10-25 13:37       ` Peter Maydell
2013-10-24 16:18 ` [Qemu-devel] [PATCH 02/19] Add set_fprf Argument to fload_invalid_op_excp() Tom Musta
2013-10-24 16:19 ` [Qemu-devel] [PATCH 03/19] General Support for VSX Helpers Tom Musta
2013-10-24 18:51   ` Richard Henderson
2013-10-24 20:42     ` Tom Musta
2013-10-24 21:00       ` Richard Henderson
2013-10-24 16:20 ` [Qemu-devel] [PATCH 04/19] Add VSX ISA2.06 xadd Instructions Tom Musta
2013-10-24 19:44   ` Richard Henderson
2013-10-24 16:20 ` [Qemu-devel] [PATCH 05/19] Add VSX ISA2.06 xsub Instructions Tom Musta
2013-10-24 19:48   ` Richard Henderson
2013-10-24 16:21 ` [Qemu-devel] [PATCH 06/19] Add VSX ISA2.06 xmul Instructions Tom Musta
2013-10-24 20:07   ` Richard Henderson
2013-10-24 16:21 ` [Qemu-devel] [PATCH 07/19] Add VSX ISA2.06 xdiv Instructions Tom Musta
2013-10-24 20:08   ` Richard Henderson
2013-10-24 16:22 ` [Qemu-devel] [PATCH 08/19] Add VSX ISA2.06 xre Instructions Tom Musta
2013-10-24 20:11   ` Richard Henderson
2013-10-24 16:22 ` [Qemu-devel] [PATCH 09/19] Add VSX ISA2.06 xsqrt Instructions Tom Musta
2013-10-24 20:23   ` Richard Henderson
2013-10-24 16:23 ` [Qemu-devel] [PATCH 10/19] Add VSX ISA2.06 xrsqrte Instructions Tom Musta
2013-10-24 20:25   ` Richard Henderson
2013-10-24 16:23 ` [Qemu-devel] [PATCH 11/19] Add VSX ISA2.06 xtdiv Instructions Tom Musta
2013-10-24 20:30   ` Richard Henderson
2013-10-24 16:24 ` [Qemu-devel] [PATCH 12/19] Add VSX ISA2.06 xtsqrt Instructions Tom Musta
2013-10-24 20:34   ` Richard Henderson
2013-10-24 16:25 ` [Qemu-devel] [PATCH 13/19] Add VSX ISA2.06 Multiply Add Instructions Tom Musta
2013-10-24 20:38   ` Richard Henderson
2013-10-25 13:49     ` Tom Musta
2013-10-25 16:25     ` Tom Musta
2013-10-25 16:42       ` Richard Henderson
2013-10-25 17:13         ` Tom Musta
2013-10-25 17:29           ` Richard Henderson
2013-10-25 17:20       ` Peter Maydell
2013-10-25 17:34         ` Richard Henderson
2013-10-24 16:25 ` [Qemu-devel] [PATCH 14/19] Add VSX xscmp*dp Instructions Tom Musta
2013-10-24 20:39   ` Richard Henderson
2013-10-24 16:26 ` Tom Musta [this message]
2013-10-24 20:45   ` [Qemu-devel] [PATCH 15/19] Add VSX xmax/xmin Instructions Richard Henderson
2013-10-24 21:07     ` Tom Musta
2013-10-24 21:18       ` Richard Henderson
2013-10-24 22:10   ` Peter Maydell
2013-10-25 13:52     ` Tom Musta
2013-10-25 13:55       ` Peter Maydell
2013-10-24 16:26 ` [Qemu-devel] [PATCH 16/19] Add VSX Vector Compare Instructions Tom Musta
2013-10-24 16:27 ` [Qemu-devel] [PATCH 17/19] Add VSX Floating Point to Floating Point Conversion Instructions Tom Musta
2013-10-24 20:49   ` Richard Henderson
2013-10-24 16:27 ` [Qemu-devel] [PATCH 18/19] Add VSX ISA2.06 Integer " Tom Musta
2013-10-24 20:51   ` Richard Henderson
2013-10-24 16:28 ` [Qemu-devel] [PATCH 19/19] Add VSX Rounding Instructions Tom Musta
2013-10-24 20:54   ` Richard Henderson

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:0373913 dfblob:29b27ce dfblob:bfb1964 dfblob:40c523a
dfblob:053df68 dfblob:67d5267 )
 OR (
bs:"[Qemu-devel] [PATCH 15/19] Add VSX xmax/xmin Instructions" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52694A24.3050509@gmail.com \
    --to=tommusta@gmail.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.