qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: thuth@redhat.com, iii@linux.ibm.com
Subject: [PATCH v3 13/13] tcg/s390x: Implement ctpop operation
Date: Thu,  1 Dec 2022 22:52:00 -0800	[thread overview]
Message-ID: <20221202065200.224537-14-richard.henderson@linaro.org> (raw)
In-Reply-To: <20221202065200.224537-1-richard.henderson@linaro.org>

There is an older form that produces per-byte results,
and a newer form that produces per-register results,
and a vector form that produces per-element results.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/s390x/tcg-target.h     |  5 ++--
 tcg/s390x/tcg-target.c.inc | 51 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/tcg/s390x/tcg-target.h b/tcg/s390x/tcg-target.h
index 191c6a073e..5d184d8e14 100644
--- a/tcg/s390x/tcg-target.h
+++ b/tcg/s390x/tcg-target.h
@@ -62,6 +62,7 @@ typedef enum TCGReg {
 #define FACILITY_LOAD_ON_COND         45
 #define FACILITY_FAST_BCR_SER         FACILITY_LOAD_ON_COND
 #define FACILITY_DISTINCT_OPS         FACILITY_LOAD_ON_COND
+#define FACILITY_POPCOUNT             FACILITY_LOAD_ON_COND
 #define FACILITY_LOAD_ON_COND2        53
 #define FACILITY_MISC_INSN_EXT2       58
 #define FACILITY_MISC_INSN_EXT3       61
@@ -91,7 +92,7 @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_nor_i32        HAVE_FACILITY(MISC_INSN_EXT3)
 #define TCG_TARGET_HAS_clz_i32        0
 #define TCG_TARGET_HAS_ctz_i32        0
-#define TCG_TARGET_HAS_ctpop_i32      0
+#define TCG_TARGET_HAS_ctpop_i32      HAVE_FACILITY(POPCOUNT)
 #define TCG_TARGET_HAS_deposit_i32    HAVE_FACILITY(GEN_INST_EXT)
 #define TCG_TARGET_HAS_extract_i32    HAVE_FACILITY(GEN_INST_EXT)
 #define TCG_TARGET_HAS_sextract_i32   0
@@ -128,7 +129,7 @@ extern uint64_t s390_facilities[3];
 #define TCG_TARGET_HAS_nor_i64        HAVE_FACILITY(MISC_INSN_EXT3)
 #define TCG_TARGET_HAS_clz_i64        HAVE_FACILITY(EXT_IMM)
 #define TCG_TARGET_HAS_ctz_i64        0
-#define TCG_TARGET_HAS_ctpop_i64      0
+#define TCG_TARGET_HAS_ctpop_i64      HAVE_FACILITY(POPCOUNT)
 #define TCG_TARGET_HAS_deposit_i64    HAVE_FACILITY(GEN_INST_EXT)
 #define TCG_TARGET_HAS_extract_i64    HAVE_FACILITY(GEN_INST_EXT)
 #define TCG_TARGET_HAS_sextract_i64   0
diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 23cbb10168..7744c6ad54 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -208,6 +208,7 @@ typedef enum S390Opcode {
 
     RRFc_LOCR   = 0xb9f2,
     RRFc_LOCGR  = 0xb9e2,
+    RRFc_POPCNT = 0xb9e1,
 
     RR_AR       = 0x1a,
     RR_ALR      = 0x1e,
@@ -259,6 +260,7 @@ typedef enum S390Opcode {
     RXY_LRVG    = 0xe30f,
     RXY_LRVH    = 0xe31f,
     RXY_LY      = 0xe358,
+    RXY_MSG     = 0xe30c,
     RXY_NG      = 0xe380,
     RXY_OG      = 0xe381,
     RXY_STCY    = 0xe372,
@@ -276,6 +278,7 @@ typedef enum S390Opcode {
     RX_L        = 0x58,
     RX_LA       = 0x41,
     RX_LH       = 0x48,
+    RX_MS       = 0x71,
     RX_ST       = 0x50,
     RX_STC      = 0x42,
     RX_STH      = 0x40,
@@ -1568,6 +1571,45 @@ static void tgen_clz(TCGContext *s, TCGReg dest, TCGReg a1,
     tgen_movcond_int(s, TCG_TYPE_I64, dest, a2, a2const, TCG_REG_R0, 8, 2);
 }
 
+static void tgen_ctpop(TCGContext *s, TCGType type, TCGReg dest, TCGReg src)
+{
+    /* With MIE3, and bit 0 of m4 set, we get the complete result. */
+    if (HAVE_FACILITY(MISC_INSN_EXT3)) {
+        if (type == TCG_TYPE_I32) {
+            tgen_ext32u(s, dest, src);
+            src = dest;
+        }
+        tcg_out_insn(s, RRFc, POPCNT, dest, src, 8);
+        return;
+    }
+
+    /* Without MIE3, each byte gets the count of bits for the byte. */
+    tcg_out_insn(s, RRFc, POPCNT, dest, src, 0);
+
+    /* Multiply to sum each byte at the top of the word. */
+    if (type == TCG_TYPE_I32 && HAVE_FACILITY(GEN_INST_EXT)) {
+        tcg_out_insn(s, RIL, MSFI, dest, 0x01010101);
+    } else {
+        /* No space to save: share the constant between TCG_TYPE_I32/I64. */
+        tcg_out_insn(s, RIL, LARL, TCG_TMP0, 0);
+        new_pool_label(s, 0x0101010101010101ull,
+                       R_390_PC32DBL, s->code_ptr - 2, 2);
+
+        if (type == TCG_TYPE_I32) {
+            tcg_out_insn(s, RX, MS, dest, TCG_TMP0, TCG_REG_NONE, 0);
+        } else {
+            tcg_out_insn(s, RXY, MSG, dest, TCG_TMP0, TCG_REG_NONE, 0);
+        }
+    }
+
+    /* Shift result down from the top byte. */
+    if (type == TCG_TYPE_I32) {
+        tcg_out_sh32(s, RS_SRL, dest, TCG_REG_NONE, 24);
+    } else {
+        tcg_out_sh64(s, RSY_SRLG, dest, dest, TCG_REG_NONE, 56);
+    }
+}
+
 static void tgen_deposit(TCGContext *s, TCGReg dest, TCGReg src,
                          int ofs, int len, int z)
 {
@@ -2733,6 +2775,13 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         tgen_clz(s, args[0], args[1], args[2], const_args[2]);
         break;
 
+    case INDEX_op_ctpop_i32:
+        tgen_ctpop(s, TCG_TYPE_I32, args[0], args[1]);
+        break;
+    case INDEX_op_ctpop_i64:
+        tgen_ctpop(s, TCG_TYPE_I64, args[0], args[1]);
+        break;
+
     case INDEX_op_mb:
         /* The host memory model is quite strong, we simply need to
            serialize the instruction stream.  */
@@ -3302,6 +3351,8 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op)
     case INDEX_op_extu_i32_i64:
     case INDEX_op_extract_i32:
     case INDEX_op_extract_i64:
+    case INDEX_op_ctpop_i32:
+    case INDEX_op_ctpop_i64:
         return C_O1_I1(r, r);
 
     case INDEX_op_qemu_ld_i32:
-- 
2.34.1



  parent reply	other threads:[~2022-12-02  6:54 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-12-02  6:51 [PATCH v3 00/13] tcg/s390x: misc patches Richard Henderson
2022-12-02  6:51 ` [PATCH v3 01/13] tcg/s390x: Use register pair allocation for div and mulu2 Richard Henderson
2022-12-06 15:49   ` Ilya Leoshkevich
2022-12-02  6:51 ` [PATCH v3 02/13] tcg/s390x: Remove TCG_REG_TB Richard Henderson
2022-12-06 19:29   ` Ilya Leoshkevich
2022-12-06 22:22     ` Richard Henderson
2022-12-07  0:42       ` Richard Henderson
2022-12-08 14:04         ` Ilya Leoshkevich
2022-12-07  7:45       ` Thomas Huth
2022-12-07 14:55         ` Richard Henderson
2022-12-07 20:40           ` Ilya Leoshkevich
2022-12-07 21:20             ` Christian Borntraeger
2022-12-07 22:09       ` Ilya Leoshkevich
2022-12-02  6:51 ` [PATCH v3 03/13] tcg/s390x: Use LARL+AGHI for odd addresses Richard Henderson
2022-12-06 19:42   ` Ilya Leoshkevich
2022-12-02  6:51 ` [PATCH v3 04/13] tcg/s390x: Distinguish RRF-a and RRF-c formats Richard Henderson
2022-12-06 19:45   ` Ilya Leoshkevich
2022-12-02  6:51 ` [PATCH v3 05/13] tcg/s390x: Distinguish RIE formats Richard Henderson
2022-12-06 19:47   ` Ilya Leoshkevich
2022-12-02  6:51 ` [PATCH v3 06/13] tcg/s390x: Support MIE2 multiply single instructions Richard Henderson
2022-12-06 20:02   ` Ilya Leoshkevich
2022-12-06 20:20     ` Richard Henderson
2022-12-02  6:51 ` [PATCH v3 07/13] tcg/s390x: Support MIE2 MGRK instruction Richard Henderson
2022-12-06 20:02   ` Ilya Leoshkevich
2022-12-02  6:51 ` [PATCH v3 08/13] tcg/s390x: Support MIE3 logical operations Richard Henderson
2022-12-06 20:08   ` Ilya Leoshkevich
2022-12-02  6:51 ` [PATCH v3 09/13] tcg/s390x: Create tgen_cmp2 to simplify movcond Richard Henderson
2022-12-06 20:14   ` Ilya Leoshkevich
2022-12-02  6:51 ` [PATCH v3 10/13] tcg/s390x: Generalize movcond implementation Richard Henderson
2022-12-06 20:39   ` Ilya Leoshkevich
2022-12-02  6:51 ` [PATCH v3 11/13] tcg/s390x: Support SELGR instruction in movcond Richard Henderson
2022-12-06 20:41   ` Ilya Leoshkevich
2022-12-02  6:51 ` [PATCH v3 12/13] tcg/s390x: Use tgen_movcond_int in tgen_clz Richard Henderson
2022-12-06 20:49   ` Ilya Leoshkevich
2022-12-02  6:52 ` Richard Henderson [this message]
2022-12-06 21:10   ` [PATCH v3 13/13] tcg/s390x: Implement ctpop operation Ilya Leoshkevich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221202065200.224537-14-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=iii@linux.ibm.com \
    --cc=qemu-devel@nongnu.org \
    --cc=thuth@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).