[PATCH v1 16/20] s390x/tcg: Implement 32/128bit for VECTOR FP MULTIPLY AND (ADD|SUBTRACT)

All of lore.kernel.org
 help / color / mirror / Atom feed

From: David Hildenbrand <david@redhat.com>
To: qemu-devel@nongnu.org
Cc: qemu-s390x@nongnu.org, Cornelia Huck <cohuck@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Thomas Huth <thuth@redhat.com>,
	David Hildenbrand <david@redhat.com>
Subject: [PATCH v1 16/20] s390x/tcg: Implement 32/128bit for VECTOR FP MULTIPLY AND (ADD|SUBTRACT)
Date: Wed, 30 Sep 2020 16:55:19 +0200	[thread overview]
Message-ID: <20200930145523.71087-17-david@redhat.com> (raw)
In-Reply-To: <20200930145523.71087-1-david@redhat.com>

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  6 ++
 target/s390x/translate_vx.c.inc | 48 +++++++++++++---
 target/s390x/vec_fpu_helper.c   | 98 ++++++++++++++++++++-------------
 3 files changed, 107 insertions(+), 45 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index c2ded83669..e4d60299dc 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -317,10 +317,16 @@ DEF_HELPER_FLAGS_5(gvec_vfm32s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma32s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfma128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms32s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64s, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_6(gvec_vfms128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq32s, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 6bd599b319..5d31498cc1 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2847,18 +2847,52 @@ static DisasJumpType op_vfma(DisasContext *s, DisasOps *o)
     const uint8_t m5 = get_field(s, m5);
     const uint8_t fpf = get_field(s, m6);
     const bool se = extract32(m5, 3, 1);
-    gen_helper_gvec_4_ptr *fn;
+    gen_helper_gvec_4_ptr *fn = NULL;
 
-    if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
+    switch (s->fields.op2) {
+    case 0x8f:
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = se ? gen_helper_gvec_vfma32s : gen_helper_gvec_vfma32;
+            }
+            break;
+        case FPF_LONG:
+            fn = se ? gen_helper_gvec_vfma64s : gen_helper_gvec_vfma64;
+            break;
+        default:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfma128;
+            }
+            break;
+        }
+        break;
+    case 0x8e:
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = se ? gen_helper_gvec_vfms32s : gen_helper_gvec_vfms32;
+            }
+            break;
+        case FPF_LONG:
+            fn = se ? gen_helper_gvec_vfms64s : gen_helper_gvec_vfms64;
+            break;
+        default:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfms128;
+            }
+            break;
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+
+    if (!fn || extract32(m5, 0, 3)) {
         gen_program_exception(s, PGM_SPECIFICATION);
         return DISAS_NORETURN;
     }
 
-    if (s->fields.op2 == 0x8f) {
-        fn = se ? gen_helper_gvec_vfma64s : gen_helper_gvec_vfma64;
-    } else {
-        fn = se ? gen_helper_gvec_vfms64s : gen_helper_gvec_vfms64;
-    }
     gen_gvec_4_ptr(get_field(s, v1), get_field(s, v2),
                    get_field(s, v3), get_field(s, v4), cpu_env,
                    0, fn);
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index f18f0ae8e2..0b25718365 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -699,53 +699,75 @@ void HELPER(gvec_vfm##BITS##s)(void *v1, const void *v2, const void *v3,       \
 DEF_GVEC_FVM_S(32)
 DEF_GVEC_FVM_S(64)
 
-static void vfma64(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
-                   const S390Vector *v4, CPUS390XState *env, bool s, int flags,
-                   uintptr_t retaddr)
-{
-    uint8_t vxc, vec_exc = 0;
-    S390Vector tmp = {};
-    int i;
-
-    for (i = 0; i < 2; i++) {
-        const uint64_t a = s390_vec_read_element64(v2, i);
-        const uint64_t b = s390_vec_read_element64(v3, i);
-        const uint64_t c = s390_vec_read_element64(v4, i);
-        uint64_t ret = float64_muladd(a, b, c, flags, &env->fpu_status);
-
-        s390_vec_write_element64(&tmp, i, ret);
-        vxc = check_ieee_exc(env, i, false, &vec_exc);
-        if (s || vxc) {
-            break;
-        }
-    }
-    handle_ieee_exc(env, vxc, vec_exc, retaddr);
-    *v1 = tmp;
+#define DEF_VFMA(BITS)                                                         \
+static void vfma##BITS(S390Vector *v1, const S390Vector *v2,                   \
+                       const S390Vector *v3, const S390Vector *v4,             \
+                       CPUS390XState *env, bool s, int flags,                  \
+                       uintptr_t retaddr)                                      \
+{                                                                              \
+    uint8_t vxc, vec_exc = 0;                                                  \
+    S390Vector tmp = {};                                                       \
+    int i;                                                                     \
+                                                                               \
+    for (i = 0; i < (128 / BITS); i++) {                                       \
+        const float##BITS a = s390_vec_read_float##BITS(v2, i);                \
+        const float##BITS b = s390_vec_read_float##BITS(v3, i);                \
+        const float##BITS c = s390_vec_read_float##BITS(v4, i);                \
+        float##BITS ret = float##BITS##_muladd(a, b, c, flags,                 \
+                                               &env->fpu_status);              \
+                                                                               \
+        s390_vec_write_float##BITS(&tmp, i, ret);                              \
+        vxc = check_ieee_exc(env, i, false, &vec_exc);                         \
+        if (s || vxc) {                                                        \
+            break;                                                             \
+        }                                                                      \
+    }                                                                          \
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);                               \
+    *v1 = tmp;                                                                 \
 }
+DEF_VFMA(32)
+DEF_VFMA(64)
+DEF_VFMA(128)
 
-void HELPER(gvec_vfma64)(void *v1, const void *v2, const void *v3,
-                         const void *v4, CPUS390XState *env, uint32_t desc)
-{
-    vfma64(v1, v2, v3, v4, env, false, 0, GETPC());
+#define DEF_GVEC_VFMA(BITS)                                                    \
+void HELPER(gvec_vfma##BITS)(void *v1, const void *v2, const void *v3,         \
+                             const void *v4, CPUS390XState *env, uint32_t desc)\
+{                                                                              \
+    vfma##BITS(v1, v2, v3, v4, env, false, 0, GETPC());                        \
 }
+DEF_GVEC_VFMA(32)
+DEF_GVEC_VFMA(64)
+DEF_GVEC_VFMA(128)
 
-void HELPER(gvec_vfma64s)(void *v1, const void *v2, const void *v3,
-                         const void *v4, CPUS390XState *env, uint32_t desc)
-{
-    vfma64(v1, v2, v3, v4, env, true, 0, GETPC());
+#define DEF_GVEC_VFMA_S(BITS)                                                  \
+void HELPER(gvec_vfma##BITS##s)(void *v1, const void *v2, const void *v3,      \
+                                const void *v4, CPUS390XState *env,            \
+                                uint32_t desc)                                 \
+{                                                                              \
+    vfma##BITS(v1, v2, v3, v4, env, true, 0, GETPC());                         \
 }
+DEF_GVEC_VFMA_S(32)
+DEF_GVEC_VFMA_S(64)
 
-void HELPER(gvec_vfms64)(void *v1, const void *v2, const void *v3,
-                         const void *v4, CPUS390XState *env, uint32_t desc)
-{
-    vfma64(v1, v2, v3, v4, env, false, float_muladd_negate_c, GETPC());
+#define DEF_GVEC_VFMS(BITS)                                                    \
+void HELPER(gvec_vfms##BITS)(void *v1, const void *v2, const void *v3,         \
+                             const void *v4, CPUS390XState *env, uint32_t desc)\
+{                                                                              \
+    vfma##BITS(v1, v2, v3, v4, env, false, float_muladd_negate_c, GETPC());    \
 }
+DEF_GVEC_VFMS(32)
+DEF_GVEC_VFMS(64)
+DEF_GVEC_VFMS(128)
 
-void HELPER(gvec_vfms64s)(void *v1, const void *v2, const void *v3,
-                         const void *v4, CPUS390XState *env, uint32_t desc)
-{
-    vfma64(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC());
+#define DEF_GVEC_VFMS_S(BITS)                                                  \
+void HELPER(gvec_vfms##BITS##s)(void *v1, const void *v2, const void *v3,      \
+                                const void *v4, CPUS390XState *env,            \
+                                uint32_t desc)                                 \
+{                                                                              \
+    vfma##BITS(v1, v2, v3, v4, env, true, float_muladd_negate_c, GETPC());     \
 }
+DEF_GVEC_VFMS_S(32)
+DEF_GVEC_VFMS_S(64)
 
 #define DEF_GVEC_VFSQ(BITS)                                                    \
 void HELPER(gvec_vfsq##BITS)(void *v1, const void *v2, CPUS390XState *env,     \
-- 
2.26.2

next prev parent reply	other threads:[~2020-09-30 15:08 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-09-30 14:55 [PATCH v1 00/20] s390x/tcg: Implement Vector enhancements facility and switch to z14 David Hildenbrand
2020-09-30 14:55 ` [PATCH v1 01/20] softfloat: Implement float128_(min|minnum|minnummag|max|maxnum|maxnummag) David Hildenbrand
2020-09-30 16:10   ` Alex Bennée
2020-10-01 12:40     ` David Hildenbrand
2020-10-01 13:15       ` Alex Bennée
2021-05-05 14:54         ` David Hildenbrand
2021-05-10  9:57           ` Alex Bennée
2021-05-10 10:00             ` David Hildenbrand
2020-09-30 14:55 ` [PATCH v1 02/20] s390x/tcg: Implement VECTOR BIT PERMUTE David Hildenbrand
2020-10-01 15:17   ` Richard Henderson
2020-10-01 17:28     ` David Hildenbrand
2020-09-30 14:55 ` [PATCH v1 03/20] s390x/tcg: Implement VECTOR MULTIPLY SUM LOGICAL David Hildenbrand
2020-10-01 15:26   ` Richard Henderson
2020-10-01 17:30     ` David Hildenbrand
2020-09-30 14:55 ` [PATCH v1 04/20] s390x/tcg: Implement 32/128 bit for VECTOR FP ADD David Hildenbrand
2020-10-01 15:45   ` Richard Henderson
2020-10-01 16:08   ` Richard Henderson
2020-10-01 17:08     ` David Hildenbrand
2020-09-30 14:55 ` [PATCH v1 05/20] s390x/tcg: Implement 32/128 bit for VECTOR FP DIVIDE David Hildenbrand
2020-09-30 14:55 ` [PATCH v1 06/20] s390x/tcg: Implement 32/128 bit for VECTOR FP MULTIPLY David Hildenbrand
2020-09-30 14:55 ` [PATCH v1 07/20] s390x/tcg: Implement 32/128 bit for VECTOR FP SUBTRACT David Hildenbrand
2020-09-30 14:55 ` [PATCH v1 08/20] s390x/tcg: Implement 32/128 bit for VECTOR FP COMPARE (AND SIGNAL) SCALAR David Hildenbrand
2020-10-01 15:52   ` Richard Henderson
2020-09-30 14:55 ` [PATCH v1 09/20] s390x/tcg: Implement 32/128 bit for VECTOR FP COMPARE * David Hildenbrand
2020-10-01 16:12   ` Richard Henderson
2020-09-30 14:55 ` [PATCH v1 10/20] s390x/tcg: Implement 32/128 bit for VECTOR LOAD FP INTEGER David Hildenbrand
2020-09-30 14:55 ` [PATCH v1 11/20] s390x/tcg: Implement 64 bit for VECTOR FP LOAD LENGTHENED David Hildenbrand
2020-10-01 16:19   ` Richard Henderson
2020-09-30 14:55 ` [PATCH v1 12/20] s390x/tcg: Implement 128 bit for VECTOR FP LOAD ROUNDED David Hildenbrand
2020-10-01 16:21   ` Richard Henderson
2020-09-30 14:55 ` [PATCH v1 13/20] s390x/tcg: Implement 32/128 bit for VECTOR FP PERFORM SIGN OPERATION David Hildenbrand
2020-10-01 16:24   ` Richard Henderson
2020-09-30 14:55 ` [PATCH v1 14/20] s390x/tcg: Implement 32/128 bit for VECTOR FP SQUARE ROOT David Hildenbrand
2020-09-30 14:55 ` [PATCH v1 15/20] s390x/tcg: Implement 32/128 bit for VECTOR FP TEST DATA CLASS IMMEDIATE David Hildenbrand
2020-10-01 16:30   ` Richard Henderson
2020-09-30 14:55 ` David Hildenbrand [this message]
2020-09-30 14:55 ` [PATCH v1 17/20] s390x/tcg: Implement VECTOR FP NEGATIVE MULTIPLY AND (ADD|SUBTRACT) David Hildenbrand
2020-09-30 14:55 ` [PATCH v1 18/20] s390x/tcg: Implement VECTOR FP (MAXIMUM|MINIMUM) David Hildenbrand
2020-10-01 16:49   ` Richard Henderson
2020-09-30 14:55 ` [PATCH v1 19/20] s390x/tcg: We support Vector enhancements facility David Hildenbrand
2020-10-01 16:50   ` Richard Henderson
2020-09-30 14:55 ` [PATCH v1 20/20] s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z14 GA2 David Hildenbrand
2020-10-01 16:52   ` Richard Henderson
2020-09-30 15:35 ` [PATCH v1 00/20] s390x/tcg: Implement Vector enhancements facility and switch to z14 no-reply
2020-10-01 15:07 ` Richard Henderson
2020-10-07 13:09   ` David Hildenbrand
2021-05-05 10:55 ` David Hildenbrand

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:c2ded8366 dfblob:e4d60299d dfblob:6bd599b31 dfblob:5d31498cc
dfblob:f18f0ae8e dfblob:0b2571836 )
 OR (
bs:"[PATCH v1 16/20] s390x/tcg: Implement 32/128bit for VECTOR FP MULTIPLY AND (ADD|SUBTRACT)" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200930145523.71087-17-david@redhat.com \
    --to=david@redhat.com \
    --cc=cohuck@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=thuth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.