[PATCH v4 13/26] s390x/tcg: Implement 32/128 bit for VECTOR FP (ADD|DIVIDE|MULTIPLY|SUBTRACT)

All of lore.kernel.org
 help / color / mirror / Atom feed

From: David Hildenbrand <david@redhat.com>
To: qemu-devel@nongnu.org
Cc: Thomas Huth <thuth@redhat.com>,
	David Hildenbrand <david@redhat.com>,
	Cornelia Huck <cohuck@redhat.com>,
	Richard Henderson <richard.henderson@linaro.org>,
	Laurent Vivier <laurent@vivier.eu>,
	Halil Pasic <pasic@linux.ibm.com>,
	Christian Borntraeger <borntraeger@de.ibm.com>,
	qemu-s390x@nongnu.org
Subject: [PATCH v4 13/26] s390x/tcg: Implement 32/128 bit for VECTOR FP (ADD|DIVIDE|MULTIPLY|SUBTRACT)
Date: Tue,  8 Jun 2021 11:23:24 +0200	[thread overview]
Message-ID: <20210608092337.12221-14-david@redhat.com> (raw)
In-Reply-To: <20210608092337.12221-1-david@redhat.com>

In case of 128bit, we always have a single element. Add new helpers for
reading/writing 32/128 bit floats.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  8 ++++
 target/s390x/translate_vx.c.inc | 85 +++++++++++++++++++++++++++++----
 target/s390x/vec_fpu_helper.c   | 74 ++++++++++++++++++++++++++--
 3 files changed, 153 insertions(+), 14 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index e99c9643eb..2d5e382e61 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -247,7 +247,9 @@ DEF_HELPER_6(gvec_vstrc_cc_rt16, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_6(gvec_vstrc_cc_rt32, void, ptr, cptr, cptr, cptr, env, i32)
 
 /* === Vector Floating-Point Instructions */
+DEF_HELPER_FLAGS_5(gvec_vfa32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfa64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfa128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfc64, void, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_wfk64, void, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfce64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
@@ -260,15 +262,21 @@ DEF_HELPER_FLAGS_4(gvec_vcdg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcdlg64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vcgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vclgd64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfd64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfd128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfi64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfll32, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vflr64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfm64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfm128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfma64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_6(gvec_vfms64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_4(gvec_vfsq64, TCG_CALL_NO_WG, void, ptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs32, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_FLAGS_5(gvec_vfs64, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_5(gvec_vfs128, TCG_CALL_NO_WG, void, ptr, cptr, cptr, env, i32)
 DEF_HELPER_4(gvec_vftci64, void, ptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target/s390x/translate_vx.c.inc b/target/s390x/translate_vx.c.inc
index 6e75b40eb8..0fbd914b40 100644
--- a/target/s390x/translate_vx.c.inc
+++ b/target/s390x/translate_vx.c.inc
@@ -2501,29 +2501,94 @@ static DisasJumpType op_vfa(DisasContext *s, DisasOps *o)
 {
     const uint8_t fpf = get_field(s, m4);
     const uint8_t m5 = get_field(s, m5);
-    gen_helper_gvec_3_ptr *fn;
-
-    if (fpf != FPF_LONG || extract32(m5, 0, 3)) {
-        gen_program_exception(s, PGM_SPECIFICATION);
-        return DISAS_NORETURN;
-    }
+    gen_helper_gvec_3_ptr *fn = NULL;
 
     switch (s->fields.op2) {
     case 0xe3:
-        fn = gen_helper_gvec_vfa64;
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfa32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfa64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfa128;
+            }
+            break;
+        default:
+            break;
+        }
         break;
     case 0xe5:
-        fn = gen_helper_gvec_vfd64;
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfd32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfd64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfd128;
+            }
+            break;
+        default:
+            break;
+        }
         break;
     case 0xe7:
-        fn = gen_helper_gvec_vfm64;
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfm32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfm64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfm128;
+            }
+            break;
+        default:
+            break;
+        }
         break;
     case 0xe2:
-        fn = gen_helper_gvec_vfs64;
+        switch (fpf) {
+        case FPF_SHORT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfs32;
+            }
+            break;
+        case FPF_LONG:
+            fn = gen_helper_gvec_vfs64;
+            break;
+        case FPF_EXT:
+            if (s390_has_feat(S390_FEAT_VECTOR_ENH)) {
+                fn = gen_helper_gvec_vfs128;
+            }
+            break;
+        default:
+            break;
+        }
         break;
     default:
         g_assert_not_reached();
     }
+
+    if (!fn || extract32(m5, 0, 3)) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
     gen_gvec_3_ptr(get_field(s, v1), get_field(s, v2),
                    get_field(s, v3), cpu_env, m5, fn);
     return DISAS_NEXT;
diff --git a/target/s390x/vec_fpu_helper.c b/target/s390x/vec_fpu_helper.c
index 4af59ea66c..3484c161ba 100644
--- a/target/s390x/vec_fpu_helper.c
+++ b/target/s390x/vec_fpu_helper.c
@@ -78,16 +78,38 @@ static void handle_ieee_exc(CPUS390XState *env, uint8_t vxc, uint8_t vec_exc,
     }
 }
 
+static float32 s390_vec_read_float32(const S390Vector *v, uint8_t enr)
+{
+    return make_float32(s390_vec_read_element32(v, enr));
+}
+
 static float64 s390_vec_read_float64(const S390Vector *v, uint8_t enr)
 {
     return make_float64(s390_vec_read_element64(v, enr));
 }
 
+static float128 s390_vec_read_float128(const S390Vector *v)
+{
+    return make_float128(s390_vec_read_element64(v, 0),
+                         s390_vec_read_element64(v, 1));
+}
+
+static void s390_vec_write_float32(S390Vector *v, uint8_t enr, float32 data)
+{
+    return s390_vec_write_element32(v, enr, data);
+}
+
 static void s390_vec_write_float64(S390Vector *v, uint8_t enr, float64 data)
 {
     return s390_vec_write_element64(v, enr, data);
 }
 
+static void s390_vec_write_float128(S390Vector *v, float128 data)
+{
+    s390_vec_write_element64(v, 0, data.high);
+    s390_vec_write_element64(v, 1, data.low);
+}
+
 typedef float64 (*vop64_2_fn)(float64 a, float_status *s);
 static void vop64_2(S390Vector *v1, const S390Vector *v2, CPUS390XState *env,
                     bool s, bool XxC, uint8_t erm, vop64_2_fn fn,
@@ -160,6 +182,29 @@ DEF_GVEC_VOP2_64(vclgd)
 DEF_GVEC_VOP2(vfi, round_to_int)
 DEF_GVEC_VOP2(vfsq, sqrt)
 
+typedef float32 (*vop32_3_fn)(float32 a, float32 b, float_status *s);
+static void vop32_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                    CPUS390XState *env, bool s, vop32_3_fn fn,
+                    uintptr_t retaddr)
+{
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        const float32 a = s390_vec_read_float32(v2, i);
+        const float32 b = s390_vec_read_float32(v3, i);
+
+        s390_vec_write_float32(&tmp, i, fn(a, b, &env->fpu_status));
+        vxc = check_ieee_exc(env, i, false, &vec_exc);
+        if (s || vxc) {
+            break;
+        }
+    }
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
 typedef float64 (*vop64_3_fn)(float64 a, float64 b, float_status *s);
 static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
                     CPUS390XState *env, bool s, vop64_3_fn fn,
@@ -183,15 +228,36 @@ static void vop64_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
     *v1 = tmp;
 }
 
-#define DEF_GVEC_VOP3(NAME, OP)                                                \
-void HELPER(gvec_##NAME##64)(void *v1, const void *v2, const void *v3,         \
-                             CPUS390XState *env, uint32_t desc)                \
+typedef float128 (*vop128_3_fn)(float128 a, float128 b, float_status *s);
+static void vop128_3(S390Vector *v1, const S390Vector *v2, const S390Vector *v3,
+                     CPUS390XState *env, bool s, vop128_3_fn fn,
+                     uintptr_t retaddr)
+{
+    const float128 a = s390_vec_read_float128(v2);
+    const float128 b = s390_vec_read_float128(v3);
+    uint8_t vxc, vec_exc = 0;
+    S390Vector tmp = {};
+
+    s390_vec_write_float128(&tmp, fn(a, b, &env->fpu_status));
+    vxc = check_ieee_exc(env, 0, false, &vec_exc);
+    handle_ieee_exc(env, vxc, vec_exc, retaddr);
+    *v1 = tmp;
+}
+
+#define DEF_GVEC_VOP3_B(NAME, OP, BITS)                                        \
+void HELPER(gvec_##NAME##BITS)(void *v1, const void *v2, const void *v3,       \
+                              CPUS390XState *env, uint32_t desc)               \
 {                                                                              \
     const bool se = extract32(simd_data(desc), 3, 1);                          \
                                                                                \
-    vop64_3(v1, v2, v3, env, se, float64_##OP, GETPC());                       \
+    vop##BITS##_3(v1, v2, v3, env, se, float##BITS##_##OP, GETPC());           \
 }
 
+#define DEF_GVEC_VOP3(NAME, OP)                                                \
+DEF_GVEC_VOP3_B(NAME, OP, 32)                                                  \
+DEF_GVEC_VOP3_B(NAME, OP, 64)                                                  \
+DEF_GVEC_VOP3_B(NAME, OP, 128)
+
 DEF_GVEC_VOP3(vfa, add)
 DEF_GVEC_VOP3(vfs, sub)
 DEF_GVEC_VOP3(vfd, div)
-- 
2.31.1

next prev parent reply	other threads:[~2021-06-08  9:34 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-06-08  9:23 [PATCH v4 00/26] s390x/tcg: Implement Vector enhancements facility and switch to z14 David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 01/26] s390x/tcg: Fix FP CONVERT TO (LOGICAL) FIXED NaN handling David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 02/26] s390x/tcg: Fix instruction name for VECTOR FP LOAD (LENGTHENED|ROUNDED) David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 03/26] s390x/tcg: Simplify vop64_3() handling David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 04/26] s390x/tcg: Simplify vop64_2() handling David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 05/26] s390x/tcg: Simplify vfc64() handling David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 06/26] s390x/tcg: Simplify vftci64() handling David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 07/26] s390x/tcg: Simplify vfma64() handling David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 08/26] s390x/tcg: Simplify vfll32() handling David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 09/26] s390x/tcg: Simplify vflr64() handling David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 10/26] s390x/tcg: Simplify wfc64() handling David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 11/26] s390x/tcg: Implement VECTOR BIT PERMUTE David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 12/26] s390x/tcg: Implement VECTOR MULTIPLY SUM LOGICAL David Hildenbrand
2021-06-08  9:23 ` David Hildenbrand [this message]
2021-06-08  9:23 ` [PATCH v4 14/26] s390x/tcg: Implement 32/128 bit for VECTOR (LOAD FP INTEGER|FP SQUARE ROOT) David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 15/26] s390x/tcg: Implement 32/128 bit for VECTOR FP COMPARE * David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 16/26] s390x/tcg: Implement 32/128 bit for VECTOR FP COMPARE (AND SIGNAL) SCALAR David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 17/26] s390x/tcg: Implement 64 bit for VECTOR FP LOAD LENGTHENED David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 18/26] s390x/tcg: Implement 128 bit for VECTOR FP LOAD ROUNDED David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 19/26] s390x/tcg: Implement 32/128 bit for VECTOR FP PERFORM SIGN OPERATION David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 20/26] s390x/tcg: Implement 32/128 bit for VECTOR FP TEST DATA CLASS IMMEDIATE David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 21/26] s390x/tcg: Implement 32/128 bit for VECTOR FP MULTIPLY AND (ADD|SUBTRACT) David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 22/26] s390x/tcg: Implement VECTOR FP NEGATIVE " David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 23/26] s390x/tcg: Implement VECTOR FP (MAXIMUM|MINIMUM) David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 24/26] linux-user: elf: s390x: Prepare for Vector enhancements facility David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 25/26] s390x/tcg: We support " David Hildenbrand
2021-06-08  9:23 ` [PATCH v4 26/26] s390x/cpumodel: Bump up QEMU model to a stripped-down IBM z14 GA2 David Hildenbrand
2021-06-09  9:09 ` [PATCH v4 00/26] s390x/tcg: Implement Vector enhancements facility and switch to z14 Cornelia Huck

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:e99c9643e dfblob:2d5e382e6 dfblob:6e75b40eb dfblob:0fbd914b4
dfblob:4af59ea66 dfblob:3484c161b )
 OR (
bs:"[PATCH v4 13/26] s390x/tcg: Implement 32/128 bit for VECTOR FP (ADD|DIVIDE|MULTIPLY|SUBTRACT)" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210608092337.12221-14-david@redhat.com \
    --to=david@redhat.com \
    --cc=borntraeger@de.ibm.com \
    --cc=cohuck@redhat.com \
    --cc=laurent@vivier.eu \
    --cc=pasic@linux.ibm.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=thuth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.