qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org
Subject: [Qemu-devel] [PULL 03/65] tcg: Add deposit_z expander
Date: Tue, 10 Jan 2017 18:17:18 -0800	[thread overview]
Message-ID: <20170111021820.24416-4-rth@twiddle.net> (raw)
In-Reply-To: <20170111021820.24416-1-rth@twiddle.net>

While we don't require a new opcode, it is handy to have an expander
that knows the first source is zero.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/tcg-op.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 tcg/tcg-op.h |   6 +++
 2 files changed, 149 insertions(+)

diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index b17f03f..1927e53 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -561,6 +561,64 @@ void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
     tcg_temp_free_i32(t1);
 }
 
+void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
+                           unsigned int ofs, unsigned int len)
+{
+    tcg_debug_assert(ofs < 32);
+    tcg_debug_assert(len > 0);
+    tcg_debug_assert(len <= 32);
+    tcg_debug_assert(ofs + len <= 32);
+
+    if (ofs + len == 32) {
+        tcg_gen_shli_i32(ret, arg, ofs);
+    } else if (ofs == 0) {
+        tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
+    } else if (TCG_TARGET_HAS_deposit_i32
+               && TCG_TARGET_deposit_i32_valid(ofs, len)) {
+        TCGv_i32 zero = tcg_const_i32(0);
+        tcg_gen_op5ii_i32(INDEX_op_deposit_i32, ret, zero, arg, ofs, len);
+        tcg_temp_free_i32(zero);
+    } else {
+        /* To help two-operand hosts we prefer to zero-extend first,
+           which allows ARG to stay live.  */
+        switch (len) {
+        case 16:
+            if (TCG_TARGET_HAS_ext16u_i32) {
+                tcg_gen_ext16u_i32(ret, arg);
+                tcg_gen_shli_i32(ret, ret, ofs);
+                return;
+            }
+            break;
+        case 8:
+            if (TCG_TARGET_HAS_ext8u_i32) {
+                tcg_gen_ext8u_i32(ret, arg);
+                tcg_gen_shli_i32(ret, ret, ofs);
+                return;
+            }
+            break;
+        }
+        /* Otherwise prefer zero-extension over AND for code size.  */
+        switch (ofs + len) {
+        case 16:
+            if (TCG_TARGET_HAS_ext16u_i32) {
+                tcg_gen_shli_i32(ret, arg, ofs);
+                tcg_gen_ext16u_i32(ret, ret);
+                return;
+            }
+            break;
+        case 8:
+            if (TCG_TARGET_HAS_ext8u_i32) {
+                tcg_gen_shli_i32(ret, arg, ofs);
+                tcg_gen_ext8u_i32(ret, ret);
+                return;
+            }
+            break;
+        }
+        tcg_gen_andi_i32(ret, arg, (1u << len) - 1);
+        tcg_gen_shli_i32(ret, ret, ofs);
+    }
+}
+
 void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
                          unsigned int ofs, unsigned int len)
 {
@@ -1762,6 +1820,91 @@ void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
     tcg_temp_free_i64(t1);
 }
 
+void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
+                           unsigned int ofs, unsigned int len)
+{
+    tcg_debug_assert(ofs < 64);
+    tcg_debug_assert(len > 0);
+    tcg_debug_assert(len <= 64);
+    tcg_debug_assert(ofs + len <= 64);
+
+    if (ofs + len == 64) {
+        tcg_gen_shli_i64(ret, arg, ofs);
+    } else if (ofs == 0) {
+        tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
+    } else if (TCG_TARGET_HAS_deposit_i64
+               && TCG_TARGET_deposit_i64_valid(ofs, len)) {
+        TCGv_i64 zero = tcg_const_i64(0);
+        tcg_gen_op5ii_i64(INDEX_op_deposit_i64, ret, zero, arg, ofs, len);
+        tcg_temp_free_i64(zero);
+    } else {
+        if (TCG_TARGET_REG_BITS == 32) {
+            if (ofs >= 32) {
+                tcg_gen_deposit_z_i32(TCGV_HIGH(ret), TCGV_LOW(arg),
+                                      ofs - 32, len);
+                tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+                return;
+            }
+            if (ofs + len <= 32) {
+                tcg_gen_deposit_z_i32(TCGV_LOW(ret), TCGV_LOW(arg), ofs, len);
+                tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
+                return;
+            }
+        }
+        /* To help two-operand hosts we prefer to zero-extend first,
+           which allows ARG to stay live.  */
+        switch (len) {
+        case 32:
+            if (TCG_TARGET_HAS_ext32u_i64) {
+                tcg_gen_ext32u_i64(ret, arg);
+                tcg_gen_shli_i64(ret, ret, ofs);
+                return;
+            }
+            break;
+        case 16:
+            if (TCG_TARGET_HAS_ext16u_i64) {
+                tcg_gen_ext16u_i64(ret, arg);
+                tcg_gen_shli_i64(ret, ret, ofs);
+                return;
+            }
+            break;
+        case 8:
+            if (TCG_TARGET_HAS_ext8u_i64) {
+                tcg_gen_ext8u_i64(ret, arg);
+                tcg_gen_shli_i64(ret, ret, ofs);
+                return;
+            }
+            break;
+        }
+        /* Otherwise prefer zero-extension over AND for code size.  */
+        switch (ofs + len) {
+        case 32:
+            if (TCG_TARGET_HAS_ext32u_i64) {
+                tcg_gen_shli_i64(ret, arg, ofs);
+                tcg_gen_ext32u_i64(ret, ret);
+                return;
+            }
+            break;
+        case 16:
+            if (TCG_TARGET_HAS_ext16u_i64) {
+                tcg_gen_shli_i64(ret, arg, ofs);
+                tcg_gen_ext16u_i64(ret, ret);
+                return;
+            }
+            break;
+        case 8:
+            if (TCG_TARGET_HAS_ext8u_i64) {
+                tcg_gen_shli_i64(ret, arg, ofs);
+                tcg_gen_ext8u_i64(ret, ret);
+                return;
+            }
+            break;
+        }
+        tcg_gen_andi_i64(ret, arg, (1ull << len) - 1);
+        tcg_gen_shli_i64(ret, ret, ofs);
+    }
+}
+
 void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
                          unsigned int ofs, unsigned int len)
 {
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index b515e6f..d42fd0d 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -292,6 +292,8 @@ void tcg_gen_rotr_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2);
 void tcg_gen_rotri_i32(TCGv_i32 ret, TCGv_i32 arg1, unsigned arg2);
 void tcg_gen_deposit_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2,
                          unsigned int ofs, unsigned int len);
+void tcg_gen_deposit_z_i32(TCGv_i32 ret, TCGv_i32 arg,
+                           unsigned int ofs, unsigned int len);
 void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
                          unsigned int ofs, unsigned int len);
 void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
@@ -473,6 +475,8 @@ void tcg_gen_rotr_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2);
 void tcg_gen_rotri_i64(TCGv_i64 ret, TCGv_i64 arg1, unsigned arg2);
 void tcg_gen_deposit_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2,
                          unsigned int ofs, unsigned int len);
+void tcg_gen_deposit_z_i64(TCGv_i64 ret, TCGv_i64 arg,
+                           unsigned int ofs, unsigned int len);
 void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
                          unsigned int ofs, unsigned int len);
 void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
@@ -959,6 +963,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 #define tcg_gen_rotr_tl tcg_gen_rotr_i64
 #define tcg_gen_rotri_tl tcg_gen_rotri_i64
 #define tcg_gen_deposit_tl tcg_gen_deposit_i64
+#define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i64
 #define tcg_gen_extract_tl tcg_gen_extract_i64
 #define tcg_gen_sextract_tl tcg_gen_sextract_i64
 #define tcg_const_tl tcg_const_i64
@@ -1049,6 +1054,7 @@ void tcg_gen_atomic_xor_fetch_i64(TCGv_i64, TCGv, TCGv_i64, TCGArg, TCGMemOp);
 #define tcg_gen_rotr_tl tcg_gen_rotr_i32
 #define tcg_gen_rotri_tl tcg_gen_rotri_i32
 #define tcg_gen_deposit_tl tcg_gen_deposit_i32
+#define tcg_gen_deposit_z_tl tcg_gen_deposit_z_i32
 #define tcg_gen_extract_tl tcg_gen_extract_i32
 #define tcg_gen_sextract_tl tcg_gen_sextract_i32
 #define tcg_const_tl tcg_const_i32
-- 
2.9.3

  parent reply	other threads:[~2017-01-11  2:18 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-11  2:17 [Qemu-devel] [PULL 00/65] tcg 2.9 patch queue Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 01/65] tcg: Add field extraction primitives Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 02/65] tcg: Minor adjustments to deposit expanders Richard Henderson
2017-01-11  2:17 ` Richard Henderson [this message]
2017-01-11  2:17 ` [Qemu-devel] [PULL 04/65] tcg/aarch64: Implement field extraction opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 05/65] tcg/arm: Move isa detection to tcg-target.h Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 06/65] tcg/arm: Implement field extraction opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 07/65] tcg/i386: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 08/65] tcg/mips: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 09/65] tcg/ppc: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 10/65] tcg/s390: Expose host facilities to tcg-target.h Richard Henderson
2017-01-13  9:18   ` Christian Borntraeger
2017-01-16  8:28     ` Christian Borntraeger
2017-01-11  2:17 ` [Qemu-devel] [PULL 11/65] tcg/s390: Implement field extraction opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 12/65] tcg/s390: Support deposit into zero Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 13/65] target-alpha: Use deposit and extract ops Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 14/65] target-arm: Use new " Richard Henderson
2017-01-14 19:41   ` Laszlo Ersek
2017-01-14 20:13     ` Richard Henderson
2017-01-16 23:05       ` Laszlo Ersek
2017-01-11  2:17 ` [Qemu-devel] [PULL 15/65] target-i386: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 16/65] target-mips: Use the new extract op Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 17/65] target-ppc: Use the new deposit and extract ops Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 18/65] target-s390x: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 19/65] tcg/optimize: Fold movcond 0/1 into setcond Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 20/65] tcg: Add markup for output requires new register Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 21/65] tcg: Transition flat op_defs array to a target callback Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 22/65] tcg: Pass the opcode width to target_parse_constraint Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 23/65] tcg: Allow an operand to be matching or a constant Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 24/65] tcg: Add clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 25/65] disas/i386.c: Handle tzcnt Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 26/65] disas/ppc: Handle popcnt and cnttz Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 27/65] target-alpha: Use the ctz and clz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 28/65] target-cris: Use clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 29/65] target-microblaze: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 30/65] target-mips: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 31/65] target-openrisc: Use clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 32/65] target-ppc: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 33/65] target-s390x: Use clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 34/65] target-tilegx: Use clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 35/65] target-tricore: Use clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 36/65] target-unicore32: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 37/65] target-xtensa: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 38/65] target-arm: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 39/65] target-i386: Use clz and ctz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 40/65] tcg/ppc: Handle ctz and clz opcodes Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 41/65] tcg/aarch64: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 42/65] tcg/arm: " Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 43/65] tcg/mips: Handle clz opcode Richard Henderson
2017-01-11  2:17 ` [Qemu-devel] [PULL 44/65] tcg/s390: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 45/65] tcg/i386: Fuly convert tcg_target_op_def Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 46/65] tcg/i386: Hoist common arguments in tcg_out_op Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 47/65] tcg/i386: Allow bmi2 shiftx to have non-matching operands Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 48/65] tcg/i386: Handle ctz and clz opcodes Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 49/65] tcg/i386: Rely on undefined/undocumented behaviour of BSF/BSR Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 50/65] tcg: Add helpers for clrsb Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 51/65] target-arm: Use clrsb helper Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 52/65] target-tricore: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 53/65] target-xtensa: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 54/65] tcg: Add opcode for ctpop Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 55/65] target-alpha: Use ctpop helper Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 56/65] target-ppc: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 57/65] target-s390x: Avoid a loop for popcnt Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 58/65] target-sparc: Use ctpop helper Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 59/65] target-tilegx: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 60/65] target-i386: " Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 61/65] qemu/host-utils.h: Reduce the operation count in the fallback ctpop Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 62/65] tests: New test-bitcnt Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 63/65] tcg: Use ctpop to generate ctz if needed Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 64/65] tcg/ppc: Handle ctpop opcode Richard Henderson
2017-01-11  2:18 ` [Qemu-devel] [PULL 65/65] tcg/i386: " Richard Henderson
2017-01-11  3:39 ` [Qemu-devel] [PULL 00/65] tcg 2.9 patch queue no-reply
2017-01-12 15:57 ` Peter Maydell

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170111021820.24416-4-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).