qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: agraf@suse.de, aurelien@aurel32.net
Subject: [Qemu-devel] [PATCH 22/35] tcg-s390: Use the AND IMMEDIATE instructions.
Date: Fri,  4 Jun 2010 12:14:30 -0700	[thread overview]
Message-ID: <1275678883-7082-23-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1275678883-7082-1-git-send-email-rth@twiddle.net>

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/s390/tcg-target.c |  179 +++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 166 insertions(+), 13 deletions(-)

diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 795ddcd..53a92c5 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -36,6 +36,7 @@
 #define TCG_CT_CONST_32    0x100
 #define TCG_CT_CONST_NEG   0x200
 #define TCG_CT_CONST_ADDI  0x400
+#define TCG_CT_CONST_ANDI  0x800
 
 /* Several places within the instruction set 0 means "no register"
    rather than TCG_REG_R0.  */
@@ -61,6 +62,8 @@ typedef enum S390Opcode {
     RIL_LGFI    = 0xc001,
     RIL_LLIHF   = 0xc00e,
     RIL_LLILF   = 0xc00f,
+    RIL_NIHF    = 0xc00a,
+    RIL_NILF    = 0xc00b,
 
     RI_AGHI     = 0xa70b,
     RI_AHI      = 0xa70a,
@@ -74,6 +77,10 @@ typedef enum S390Opcode {
     RI_LLIHL    = 0xa50d,
     RI_LLILH    = 0xa50e,
     RI_LLILL    = 0xa50f,
+    RI_NIHH     = 0xa504,
+    RI_NIHL     = 0xa505,
+    RI_NILH     = 0xa506,
+    RI_NILL     = 0xa507,
 
     RRE_AGR     = 0xb908,
     RRE_CGR     = 0xb920,
@@ -319,6 +326,10 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
         ct->ct &= ~TCG_CT_REG;
         ct->ct |= TCG_CT_CONST_ADDI;
         break;
+    case 'A':
+        ct->ct &= ~TCG_CT_REG;
+        ct->ct |= TCG_CT_CONST_ANDI;
+        break;
     default:
         break;
     }
@@ -328,9 +339,66 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
     return 0;
 }
 
+/* Immediates to be used with logical AND.  This is an optimization only,
+   since a full 64-bit immediate AND can always be performed with 4 sequential
+   NI[LH][LH] instructions.  What we're looking for is immediates that we
+   can load efficiently, and the immediate load plus the reg-reg AND is
+   smaller than the sequential NI's.  */
+
+static int tcg_match_andi(int ct, tcg_target_ulong val)
+{
+    int i;
+
+    if (facilities & FACILITY_EXT_IMM) {
+        if (ct & TCG_CT_CONST_32) {
+            /* All 32-bit ANDs can be performed with 1 48-bit insn.  */
+            return 1;
+        }
+
+        /* Zero-extensions.  */
+        if (val == 0xff || val == 0xffff || val == 0xffffffff) {
+            return 1;
+        }
+    } else {
+        if (ct & TCG_CT_CONST_32) {
+            val = (uint32_t)val;
+        } else if (val == 0xffffffff) {
+            return 1;
+        }
+    }
+
+    /* Try all 32-bit insns that can perform it in one go.  */
+    for (i = 0; i < 4; i++) {
+        tcg_target_ulong mask = ~(0xffffull << i*16);
+        if ((val & mask) == mask) {
+            return 1;
+        }
+    }
+
+    /* Look for 16-bit values performing the mask.  These are better
+       to load with LLI[LH][LH].  */
+    for (i = 0; i < 4; i++) {
+        tcg_target_ulong mask = 0xffffull << i*16;
+        if ((val & mask) == val) {
+            return 0;
+        }
+    }
+
+    /* Look for 32-bit values performing the 64-bit mask.  These
+       are better to load with LLI[LH]F, or if extended immediates
+       not available, with a pair of LLI insns.  */
+    if ((ct & TCG_CT_CONST_32) == 0) {
+        if (val <= 0xffffffff || (val & 0xffffffff) == 0) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
 /* Test if a constant matches the constraint. */
-static inline int tcg_target_const_match(tcg_target_long val,
-                                         const TCGArgConstraint *arg_ct)
+static int tcg_target_const_match(tcg_target_long val,
+                                  const TCGArgConstraint *arg_ct)
 {
     int ct = arg_ct->ct;
 
@@ -357,6 +425,8 @@ static inline int tcg_target_const_match(tcg_target_long val,
         } else {
             return val == (int16_t)val;
         }
+    } else if (ct & TCG_CT_CONST_ANDI) {
+        return tcg_match_andi(ct, val);
     }
 
     return 0;
@@ -703,6 +773,74 @@ static void tgen64_addi(TCGContext *s, TCGReg dest, int64_t val)
 
 }
 
+static void tgen64_andi(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+{
+    static const S390Opcode ni_insns[4] = {
+        RI_NILL, RI_NILH, RI_NIHL, RI_NIHH
+    };
+    static const S390Opcode nif_insns[2] = {
+        RIL_NILF, RIL_NIHF
+    };
+
+    int i;
+
+    /* Look for no-op.  */
+    if (val == -1) {
+        return;
+    }
+
+    /* Look for the zero-extensions.  */
+    if (val == 0xffffffff) {
+        tgen_ext32u(s, dest, dest);
+        return;
+    }
+
+    if (facilities & FACILITY_EXT_IMM) {
+        if (val == 0xff) {
+            tgen_ext8u(s, TCG_TYPE_I64, dest, dest);
+            return;
+        }
+        if (val == 0xffff) {
+            tgen_ext16u(s, TCG_TYPE_I64, dest, dest);
+            return;
+        }
+
+        /* Try all 32-bit insns that can perform it in one go.  */
+        for (i = 0; i < 4; i++) {
+            tcg_target_ulong mask = ~(0xffffull << i*16);
+            if ((val & mask) == mask) {
+                tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+                return;
+            }
+        }
+
+        /* Try all 48-bit insns that can perform it in one go.  */
+        if (facilities & FACILITY_EXT_IMM) {
+            for (i = 0; i < 2; i++) {
+                tcg_target_ulong mask = ~(0xffffffffull << i*32);
+                if ((val & mask) == mask) {
+                    tcg_out_insn_RIL(s, nif_insns[i], dest, val >> i*32);
+                    return;
+                }
+            }
+        }
+
+        /* Perform the AND via sequential modifications to the high and low
+           parts.  Do this via recursion to handle 16-bit vs 32-bit masks in
+           each half.  */
+        tgen64_andi(s, dest, val | 0xffffffff00000000ull);
+        tgen64_andi(s, dest, val | 0x00000000ffffffffull);
+    } else {
+        /* With no extended-immediate facility, just emit the sequence.  */
+        for (i = 0; i < 4; i++) {
+            tcg_target_ulong mask = 0xffffull << i*16;
+            if ((val & mask) != mask) {
+                tcg_out_insn_RI(s, ni_insns[i], dest, val >> i*16);
+            }
+        }
+    }
+}
+
 static void tgen32_cmp(TCGContext *s, TCGCond c, TCGReg r1, TCGReg r2)
 {
     if (c > TCG_COND_GT) {
@@ -776,6 +914,16 @@ static void tgen_calli(TCGContext *s, tcg_target_long dest)
 }
 
 #if defined(CONFIG_SOFTMMU)
+static void tgen64_andi_tmp(TCGContext *s, TCGReg dest, tcg_target_ulong val)
+{
+    if (tcg_match_andi(0, val)) {
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_TMP0, val);
+        tcg_out_insn(s, RRE, NGR, dest, TCG_TMP0);
+    } else {
+        tgen64_andi(s, dest, val);
+    }
+}
+
 static void tcg_prepare_qemu_ldst(TCGContext* s, int data_reg, int addr_reg,
                                   int mem_index, int opc,
                                   uint16_t **label2_ptr_p, int is_store)
@@ -803,13 +951,8 @@ static void tcg_prepare_qemu_ldst(TCGContext* s, int data_reg, int addr_reg,
     tcg_out_sh64(s, RSY_SRLG, arg1, addr_reg, TCG_REG_NONE,
                  TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
 
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0,
-                 TARGET_PAGE_MASK | ((1 << s_bits) - 1));
-    tcg_out_insn(s, RRE, NGR, arg0, TCG_TMP0);
-
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0,
-                 (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-    tcg_out_insn(s, RRE, NGR, arg1, TCG_TMP0);
+    tgen64_andi_tmp(s, arg0, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
+    tgen64_andi_tmp(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
 
     if (is_store) {
         tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0,
@@ -1178,7 +1321,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_and_i32:
-        tcg_out_insn(s, RR, NR, args[0], args[2]);
+        if (const_args[2]) {
+            tgen64_andi(s, args[0], args[2] | 0xffffffff00000000ull);
+        } else {
+            tcg_out_insn(s, RR, NR, args[0], args[2]);
+        }
         break;
     case INDEX_op_or_i32:
         tcg_out_insn(s, RR, OR, args[0], args[2]);
@@ -1188,7 +1335,11 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_and_i64:
-        tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+        if (const_args[2]) {
+            tgen64_andi(s, args[0], args[2]);
+        } else {
+            tcg_out_insn(s, RRE, NGR, args[0], args[2]);
+        }
         break;
     case INDEX_op_or_i64:
         tcg_out_insn(s, RRE, OGR, args[0], args[2]);
@@ -1454,9 +1605,10 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_div2_i32, { "b", "a", "0", "1", "r" } },
     { INDEX_op_divu2_i32, { "b", "a", "0", "1", "r" } },
 
-    { INDEX_op_and_i32, { "r", "0", "r" } },
+    { INDEX_op_and_i32, { "r", "0", "rWA" } },
     { INDEX_op_or_i32, { "r", "0", "r" } },
     { INDEX_op_xor_i32, { "r", "0", "r" } },
+
     { INDEX_op_neg_i32, { "r", "r" } },
 
     { INDEX_op_shl_i32, { "r", "0", "Ri" } },
@@ -1515,9 +1667,10 @@ static const TCGTargetOpDef s390_op_defs[] = {
     { INDEX_op_div2_i64, { "b", "a", "0", "1", "r" } },
     { INDEX_op_divu2_i64, { "b", "a", "0", "1", "r" } },
 
-    { INDEX_op_and_i64, { "r", "0", "r" } },
+    { INDEX_op_and_i64, { "r", "0", "rA" } },
     { INDEX_op_or_i64, { "r", "0", "r" } },
     { INDEX_op_xor_i64, { "r", "0", "r" } },
+
     { INDEX_op_neg_i64, { "r", "r" } },
 
     { INDEX_op_shl_i64, { "r", "r", "Ri" } },
-- 
1.7.0.1

  parent reply	other threads:[~2010-06-04 19:16 UTC|newest]

Thread overview: 75+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-06-04 19:14 [Qemu-devel] [PATCH 00/35] S390 TCG target, version 2 Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 01/35] tcg-s390: Adjust compilation flags Richard Henderson
2010-06-09 22:53   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 02/35] s390x: Avoid _llseek Richard Henderson
2010-06-09 22:54   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 03/35] s390x: Don't use a linker script for user-only Richard Henderson
2010-06-09 22:54   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 04/35] tcg-s390: Compute is_write in cpu_signal_handler Richard Henderson
2010-06-09 22:54   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 05/35] tcg-s390: Icache flush is a no-op Richard Henderson
2010-06-09 22:55   ` Aurelien Jarno
2010-06-10 22:04     ` Richard Henderson
2010-06-11  6:46       ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 06/35] tcg-s390: Allocate the code_gen_buffer near the main program Richard Henderson
2010-06-09 22:59   ` Aurelien Jarno
2010-06-10 22:05     ` Richard Henderson
2010-06-11  7:31       ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 07/35] tcg: Optionally sign-extend 32-bit arguments for 64-bit host Richard Henderson
2010-06-10 10:22   ` Aurelien Jarno
2010-06-10 22:08     ` Richard Henderson
2010-06-14 22:20     ` Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 08/35] s390: Update disassembler to the last GPLv2 from binutils Richard Henderson
2010-06-09 22:47   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 09/35] s390: Disassemble some general-instruction-extension insns Richard Henderson
2010-06-09 22:47   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 10/35] tcg-s390: New TCG target Richard Henderson
2010-06-10 10:24   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 11/35] tcg-s390: Tidy unimplemented opcodes Richard Henderson
2010-06-10 10:24   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 12/35] tcg-s390: Define TCG_TMP0 Richard Henderson
2010-06-10 10:25   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 13/35] tcg-s390: Tidy regset initialization; use R14 as temporary Richard Henderson
2010-06-10 10:26   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 14/35] tcg-s390: Rearrange register allocation order Richard Henderson
2010-06-10 10:26   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 15/35] tcg-s390: Query instruction extensions that are installed Richard Henderson
2010-06-10 10:28   ` Aurelien Jarno
2010-06-10 22:19     ` Richard Henderson
2010-06-11  8:06       ` Aurelien Jarno
2010-06-11 13:07         ` Richard Henderson
2010-06-12 11:57           ` Aurelien Jarno
2010-06-11 13:13         ` Richard Henderson
2010-06-13 10:49           ` Aurelien Jarno
2010-06-13 16:02             ` Richard Henderson
2010-06-13 16:44               ` Aurelien Jarno
2010-06-13 22:23                 ` Alexander Graf
2010-06-14 16:20                   ` Richard Henderson
2010-06-14 17:39                     ` Alexander Graf
2010-06-04 19:14 ` [Qemu-devel] [PATCH 16/35] tcg-s390: Re-implement tcg_out_movi Richard Henderson
2010-06-12 12:04   ` Aurelien Jarno
2010-06-13 23:19     ` Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 17/35] tcg-s390: Implement sign and zero-extension operations Richard Henderson
2010-06-12 12:32   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 18/35] tcg-s390: Implement bswap operations Richard Henderson
2010-06-12 12:32   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 19/35] tcg-s390: Implement rotates Richard Henderson
2010-06-12 12:33   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 20/35] tcg-s390: Use LOAD COMPLIMENT for negate Richard Henderson
2010-06-12 12:33   ` Aurelien Jarno
2010-06-04 19:14 ` [Qemu-devel] [PATCH 21/35] tcg-s390: Use the ADD IMMEDIATE instructions Richard Henderson
2010-06-04 19:14 ` Richard Henderson [this message]
2010-06-04 19:14 ` [Qemu-devel] [PATCH 23/35] tcg-s390: Use the OR " Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 24/35] tcg-s390: Use the XOR " Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 25/35] tcg-s390: Use the MULTIPLY " Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 26/35] tcg-s390: Tidy goto_tb Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 27/35] tcg-s390: Rearrange qemu_ld/st to avoid register copy Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 28/35] tcg-s390: Tidy tcg_prepare_qemu_ldst Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 29/35] tcg-s390: Tidy user qemu_ld/st Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 30/35] tcg-s390: Implement GUEST_BASE Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 31/35] tcg-s390: Use 16-bit branches for forward jumps Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 32/35] tcg-s390: Use the LOAD AND TEST instruction for compares Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 33/35] tcg-s390: Use the COMPARE IMMEDIATE instrucions " Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 34/35] tcg-s390: Use COMPARE AND BRANCH instructions Richard Henderson
2010-06-04 19:14 ` [Qemu-devel] [PATCH 35/35] tcg-s390: Enable compile in 32-bit mode Richard Henderson
2010-06-08 13:11 ` [Qemu-devel] Re: [PATCH 00/35] S390 TCG target, version 2 Alexander Graf

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1275678883-7082-23-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=agraf@suse.de \
    --cc=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).