qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: David Hildenbrand <david@redhat.com>
To: qemu-devel@nongnu.org
Cc: qemu-s390x@nongnu.org, Thomas Huth <thuth@redhat.com>,
	Cornelia Huck <cohuck@redhat.com>,
	Richard Henderson <rth@twiddle.net>,
	David Hildenbrand <david@redhat.com>
Subject: [Qemu-devel] [PATCH v2 19/32] s390x/tcg: Implement VECTOR PACK *
Date: Fri,  1 Mar 2019 12:54:00 +0100	[thread overview]
Message-ID: <20190301115413.27153-20-david@redhat.com> (raw)
In-Reply-To: <20190301115413.27153-1-david@redhat.com>

This is a big one. Luckily we only have a limited set of such nasty
instructions.

We'll implement all variants with helpers, except when sorces and
destinations don't overlap for VECTOR PACK. Provide different helpers
when the cc is to be modified. We'll return the cc then via env->cc_op.

Signed-off-by: David Hildenbrand <david@redhat.com>
---
 target/s390x/helper.h           |  15 +++++
 target/s390x/insn-data.def      |   6 ++
 target/s390x/translate_vx.inc.c |  89 +++++++++++++++++++++++++++
 target/s390x/vec_helper.c       | 105 ++++++++++++++++++++++++++++++++
 4 files changed, 215 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 6c745ba0f6..315495f49f 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -126,6 +126,21 @@ DEF_HELPER_FLAGS_1(stck, TCG_CALL_NO_RWG_SE, i64, env)
 
 /* === Vector Support Instructions === */
 DEF_HELPER_FLAGS_4(vll, TCG_CALL_NO_WG, void, env, ptr, i64, i64)
+DEF_HELPER_FLAGS_4(gvec_vpk16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vpk32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vpk64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vpks16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vpks32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vpks64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vpks_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vpks_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vpks_cc64, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_FLAGS_4(gvec_vpkls16, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vpkls32, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_FLAGS_4(gvec_vpkls64, TCG_CALL_NO_RWG, void, ptr, cptr, cptr, i32)
+DEF_HELPER_5(gvec_vpkls_cc16, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vpkls_cc32, void, ptr, cptr, cptr, env, i32)
+DEF_HELPER_5(gvec_vpkls_cc64, void, ptr, cptr, cptr, env, i32)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index f7232f8615..39cd6f27c1 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -1014,6 +1014,12 @@
     F(0xe761, VMRH,    VRR_c, V,   0, 0, 0, 0, vmr, 0, IF_VEC)
 /* VECTOR MERGE LOW */
     F(0xe760, VMRL,    VRR_c, V,   0, 0, 0, 0, vmr, 0, IF_VEC)
+/* VECTOR PACK */
+    F(0xe794, VPK,     VRR_c, V,   0, 0, 0, 0, vpk, 0, IF_VEC)
+/* VECTOR PACK SATURATE */
+    F(0xe797, VPKS,    VRR_b, V,   0, 0, 0, 0, vpk, 0, IF_VEC)
+/* VECTOR PACK LOGICAL SATURATE */
+    F(0xe795, VPKLS,   VRR_b, V,   0, 0, 0, 0, vpk, 0, IF_VEC)
 
 #ifndef CONFIG_USER_ONLY
 /* COMPARE AND SWAP AND PURGE */
diff --git a/target/s390x/translate_vx.inc.c b/target/s390x/translate_vx.inc.c
index 35a63cd3db..17ea361215 100644
--- a/target/s390x/translate_vx.inc.c
+++ b/target/s390x/translate_vx.inc.c
@@ -135,6 +135,12 @@ static void get_vec_element_ptr_i64(TCGv_ptr ptr, uint8_t reg, TCGv_i64 enr,
     tcg_temp_free_i64(tmp);
 }
 
+#define gen_gvec_3_ool(v1, v2, v3, data, fn) \
+    tcg_gen_gvec_3_ool(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       vec_full_reg_offset(v3), 16, 16, data, fn)
+#define gen_gvec_3_ptr(v1, v2, v3, ptr, data, fn) \
+    tcg_gen_gvec_3_ptr(vec_full_reg_offset(v1), vec_full_reg_offset(v2), \
+                       vec_full_reg_offset(v3), ptr, 16, 16, data, fn)
 #define gen_gvec_dup_i64(es, v1, c) \
     tcg_gen_gvec_dup_i64(es, vec_full_reg_offset(v1), 16, 16, c)
 #define gen_gvec_mov(v1, v2) \
@@ -568,3 +574,86 @@ static DisasJumpType op_vmr(DisasContext *s, DisasOps *o)
     tcg_temp_free_i64(tmp);
     return DISAS_NEXT;
 }
+
+static DisasJumpType op_vpk(DisasContext *s, DisasOps *o)
+{
+    const uint8_t v1 = get_field(s->fields, v1);
+    const uint8_t v2 = get_field(s->fields, v2);
+    const uint8_t v3 = get_field(s->fields, v3);
+    const uint8_t es = get_field(s->fields, m4);
+    static gen_helper_gvec_3 * vpk[3] = {
+        gen_helper_gvec_vpk16,
+        gen_helper_gvec_vpk32,
+        gen_helper_gvec_vpk64,
+    };
+     static gen_helper_gvec_3 * vpks[3] = {
+        gen_helper_gvec_vpks16,
+        gen_helper_gvec_vpks32,
+        gen_helper_gvec_vpks64,
+    };
+    static gen_helper_gvec_3_ptr * vpks_cc[3] = {
+        gen_helper_gvec_vpks_cc16,
+        gen_helper_gvec_vpks_cc32,
+        gen_helper_gvec_vpks_cc64,
+    };
+    static gen_helper_gvec_3 * vpkls[3] = {
+        gen_helper_gvec_vpkls16,
+        gen_helper_gvec_vpkls32,
+        gen_helper_gvec_vpkls64,
+    };
+    static gen_helper_gvec_3_ptr * vpkls_cc[3] = {
+        gen_helper_gvec_vpkls_cc16,
+        gen_helper_gvec_vpkls_cc32,
+        gen_helper_gvec_vpkls_cc64,
+    };
+
+    if (es == ES_8 || es > ES_64) {
+        gen_program_exception(s, PGM_SPECIFICATION);
+        return DISAS_NORETURN;
+    }
+
+    switch (s->fields->op2) {
+    case 0x97:
+        if (get_field(s->fields, m5) & 0x1) {
+            gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpks_cc[es - 1]);
+            set_cc_static(s);
+        } else {
+            gen_gvec_3_ool(v1, v2, v3, 0, vpks[es - 1]);
+        }
+        break;
+    case 0x95:
+        if (get_field(s->fields, m5) & 0x1) {
+            gen_gvec_3_ptr(v1, v2, v3, cpu_env, 0, vpkls_cc[es - 1]);
+            set_cc_static(s);
+        } else {
+            gen_gvec_3_ool(v1, v2, v3, 0, vpkls[es - 1]);
+        }
+        break;
+    case 0x94:
+        /* If sources and destination dont't overlap -> fast path */
+        if (v1 != v2 && v1 != v3) {
+            const uint8_t src_es = get_field(s->fields, m4);
+            const uint8_t dst_es = src_es - 1;
+            TCGv_i64 tmp = tcg_temp_new_i64();
+            int dst_idx, src_idx;
+
+            for (dst_idx = 0; dst_idx < NUM_VEC_ELEMENTS(dst_es); dst_idx++) {
+                src_idx = dst_idx;
+                if (src_idx < NUM_VEC_ELEMENTS(src_es)) {
+                    read_vec_element_i64(tmp, v2, src_idx, src_es);
+                } else {
+                    src_idx -= NUM_VEC_ELEMENTS(src_es);
+                    read_vec_element_i64(tmp, v3, src_idx, src_es);
+                }
+                write_vec_element_i64(tmp, v1, dst_idx, dst_es);
+            }
+            tcg_temp_free_i64(tmp);
+        } else {
+            gen_gvec_3_ool(v1, v2, v3, 0, vpk[es - 1]);
+        }
+        break;
+    default:
+        g_assert_not_reached();
+    }
+    return DISAS_NEXT;
+}
diff --git a/target/s390x/vec_helper.c b/target/s390x/vec_helper.c
index b76c4e3284..6a183c8359 100644
--- a/target/s390x/vec_helper.c
+++ b/target/s390x/vec_helper.c
@@ -15,6 +15,7 @@
 #include "internal.h"
 #include "vec.h"
 #include "tcg/tcg.h"
+#include "tcg/tcg-gvec-desc.h"
 #include "exec/helper-proto.h"
 #include "exec/cpu_ldst.h"
 #include "exec/exec-all.h"
@@ -117,3 +118,107 @@ void HELPER(vll)(CPUS390XState *env, void *v1, uint64_t addr, uint64_t bytes)
         *(S390Vector *)v1 = tmp;
     }
 }
+
+#define DEF_VPK_HFN(BITS, TBITS)                                               \
+typedef uint##TBITS##_t (*vpk##BITS##_fn)(uint##BITS##_t, int *);              \
+static int vpk##BITS##_hfn(S390Vector *v1, const S390Vector *v2,               \
+                           const S390Vector *v3, vpk##BITS##_fn fn)            \
+{                                                                              \
+    int i, saturated = 0;                                                      \
+    S390Vector tmp;                                                            \
+                                                                               \
+    for (i = 0; i < (128 / TBITS); i++) {                                      \
+        uint##BITS##_t src;                                                    \
+                                                                               \
+        if (i < (128 / BITS)) {                                                \
+            src = s390_vec_read_element##BITS(v2, i);                          \
+        } else {                                                               \
+            src = s390_vec_read_element##BITS(v3, i - (128 / BITS));           \
+        }                                                                      \
+        s390_vec_write_element##TBITS(&tmp, i, fn(src, &saturated));           \
+    }                                                                          \
+    *v1 = tmp;                                                                 \
+    return saturated;                                                          \
+}
+DEF_VPK_HFN(64, 32)
+DEF_VPK_HFN(32, 16)
+DEF_VPK_HFN(16, 8)
+
+#define DEF_VPK(BITS, TBITS)                                                   \
+static uint##TBITS##_t vpk##BITS##e(uint##BITS##_t src, int *saturated)        \
+{                                                                              \
+    return src;                                                                \
+}                                                                              \
+void HELPER(gvec_vpk##BITS)(void *v1, const void *v2, const void *v3,          \
+                            uint32_t desc)                                     \
+{                                                                              \
+    vpk##BITS##_hfn(v1, v2, v3, vpk##BITS##e);                                 \
+}
+DEF_VPK(64, 32)
+DEF_VPK(32, 16)
+DEF_VPK(16, 8)
+
+#define DEF_VPKS(BITS, TBITS)                                                  \
+static uint##TBITS##_t vpks##BITS##e(uint##BITS##_t src, int *saturated)       \
+{                                                                              \
+    if ((int##BITS##_t)src > INT##TBITS##_MAX) {                               \
+        (*saturated)++;                                                        \
+        return INT##TBITS##_MAX;                                               \
+    } else if ((int##BITS##_t)src < INT##TBITS##_MIN) {                        \
+        (*saturated)++;                                                        \
+        return INT##TBITS##_MIN;                                               \
+    }                                                                          \
+    return src;                                                                \
+}                                                                              \
+void HELPER(gvec_vpks##BITS)(void *v1, const void *v2, const void *v3,         \
+                             uint32_t desc)                                    \
+{                                                                              \
+    vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e);                                \
+}                                                                              \
+void HELPER(gvec_vpks_cc##BITS)(void *v1, const void *v2, const void *v3,      \
+                                CPUS390XState *env, uint32_t desc)             \
+{                                                                              \
+    int saturated = vpk##BITS##_hfn(v1, v2, v3, vpks##BITS##e);                \
+                                                                               \
+    if (saturated == (128 / TBITS)) {                                          \
+        env->cc_op = 3;                                                        \
+    } else if (saturated) {                                                    \
+        env->cc_op = 1;                                                        \
+    } else {                                                                   \
+        env->cc_op = 0;                                                        \
+    }                                                                          \
+}
+DEF_VPKS(64, 32)
+DEF_VPKS(32, 16)
+DEF_VPKS(16, 8)
+
+#define DEF_VPKLS(BITS, TBITS)                                                 \
+static uint##TBITS##_t vpkls##BITS##e(uint##BITS##_t src, int *saturated)      \
+{                                                                              \
+    if (src > UINT##TBITS##_MAX) {                                             \
+        (*saturated)++;                                                        \
+        return UINT##TBITS##_MAX;                                              \
+    }                                                                          \
+    return src;                                                                \
+}                                                                              \
+void HELPER(gvec_vpkls##BITS)(void *v1, const void *v2, const void *v3,        \
+                              uint32_t desc)                                   \
+{                                                                              \
+    vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e);                               \
+}                                                                              \
+void HELPER(gvec_vpkls_cc##BITS)(void *v1, const void *v2, const void *v3,     \
+                                 CPUS390XState *env, uint32_t desc)            \
+{                                                                              \
+    int saturated = vpk##BITS##_hfn(v1, v2, v3, vpkls##BITS##e);               \
+                                                                               \
+    if (saturated == (128 / TBITS)) {                                          \
+        env->cc_op = 3;                                                        \
+    } else if (saturated) {                                                    \
+        env->cc_op = 1;                                                        \
+    } else {                                                                   \
+        env->cc_op = 0;                                                        \
+    }                                                                          \
+}
+DEF_VPKLS(64, 32)
+DEF_VPKLS(32, 16)
+DEF_VPKLS(16, 8)
-- 
2.17.2

  parent reply	other threads:[~2019-03-01 11:55 UTC|newest]

Thread overview: 61+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-01 11:53 [Qemu-devel] [PATCH v2 00/32] s390x/tcg: Vector Instruction Support Part 1 David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 01/32] s390x/tcg: Define vector instruction formats David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 02/32] s390x/tcg: Check vector register instructions at central point David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 03/32] s390x/tcg: Utilities for vector instruction helpers David Hildenbrand
2019-03-01 16:09   ` Richard Henderson
2019-03-01 16:13     ` David Hildenbrand
2019-03-01 16:16       ` Richard Henderson
2019-03-01 16:18         ` David Hildenbrand
2019-03-01 16:16       ` David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 04/32] s390x/tcg: Implement VECTOR GATHER ELEMENT David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 05/32] s390x/tcg: Implement VECTOR GENERATE BYTE MASK David Hildenbrand
2019-03-01 16:11   ` Richard Henderson
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 06/32] s390x/tcg: Implement VECTOR GENERATE MASK David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 07/32] s390x/tcg: Implement VECTOR LOAD David Hildenbrand
2019-03-01 16:17   ` Richard Henderson
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 08/32] s390x/tcg: Implement VECTOR LOAD AND REPLICATE David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 09/32] s390x/tcg: Implement VECTOR LOAD ELEMENT David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 10/32] s390x/tcg: Implement VECTOR LOAD ELEMENT IMMEDIATE David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 11/32] s390x/tcg: Implement VECTOR LOAD GR FROM VR ELEMENT David Hildenbrand
2019-03-01 16:21   ` Richard Henderson
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 12/32] s390x/tcg: Implement VECTOR LOAD LOGICAL ELEMENT AND ZERO David Hildenbrand
2019-03-01 16:21   ` Richard Henderson
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 13/32] s390x/tcg: Implement VECTOR LOAD MULTIPLE David Hildenbrand
2019-03-01 16:26   ` Richard Henderson
2019-03-01 16:33     ` David Hildenbrand
2019-03-01 17:51   ` David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 14/32] s390x/tcg: Implement VECTOR LOAD TO BLOCK BOUNDARY David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 15/32] s390x/tcg: Implement VECTOR LOAD VR ELEMENT FROM GR David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 16/32] s390x/tcg: Implement VECTOR LOAD VR FROM GRS DISJOINT David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 17/32] s390x/tcg: Implement VECTOR LOAD WITH LENGTH David Hildenbrand
2019-03-01 11:53 ` [Qemu-devel] [PATCH v2 18/32] s390x/tcg: Implement VECTOR MERGE (HIGH|LOW) David Hildenbrand
2019-03-01 16:28   ` Richard Henderson
2019-03-01 11:54 ` David Hildenbrand [this message]
2019-03-01 17:28   ` [Qemu-devel] [PATCH v2 19/32] s390x/tcg: Implement VECTOR PACK * Richard Henderson
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 20/32] s390x/tcg: Implement VECTOR PERMUTE David Hildenbrand
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 21/32] s390x/tcg: Implement VECTOR PERMUTE DOUBLEWORD IMMEDIATE David Hildenbrand
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 22/32] s390x/tcg: Implement VECTOR REPLICATE David Hildenbrand
2019-03-01 17:35   ` Richard Henderson
2019-03-01 17:40     ` David Hildenbrand
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 23/32] s390x/tcg: Implement VECTOR REPLICATE IMMEDIATE David Hildenbrand
2019-03-01 17:35   ` Richard Henderson
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 24/32] s390x/tcg: Implement VECTOR SCATTER ELEMENT David Hildenbrand
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 25/32] s390x/tcg: Implement VECTOR SELECT David Hildenbrand
2019-03-01 17:37   ` Richard Henderson
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 26/32] s390x/tcg: Implement VECTOR SIGN EXTEND TO DOUBLEWORD David Hildenbrand
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 27/32] s390x/tcg: Provide probe_write helper David Hildenbrand
2019-03-01 17:54   ` Richard Henderson
2019-03-01 18:15     ` David Hildenbrand
2019-03-04  8:59     ` David Hildenbrand
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 28/32] s390x/tcg: Implement VECTOR STORE David Hildenbrand
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 29/32] s390x/tcg: Implement VECTOR STORE ELEMENT David Hildenbrand
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 30/32] s390x/tcg: Implement VECTOR STORE MULTIPLE David Hildenbrand
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 31/32] s390x/tcg: Implement VECTOR STORE WITH LENGTH David Hildenbrand
2019-03-01 11:54 ` [Qemu-devel] [PATCH v2 32/32] s390x/tcg: Implement VECTOR UNPACK * David Hildenbrand
2019-03-01 12:26 ` [Qemu-devel] [PATCH v2 00/32] s390x/tcg: Vector Instruction Support Part 1 no-reply
2019-03-01 16:17 ` no-reply
2019-03-01 16:26 ` no-reply
2019-03-01 16:31 ` no-reply
2019-03-01 16:38 ` no-reply
2019-03-01 16:40   ` David Hildenbrand
2019-03-01 21:24 ` no-reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190301115413.27153-20-david@redhat.com \
    --to=david@redhat.com \
    --cc=cohuck@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-s390x@nongnu.org \
    --cc=rth@twiddle.net \
    --cc=thuth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).