From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: alex.bennee@linaro.org
Subject: [PATCH v2 32/36] tcg/i386: Implement INDEX_op_rotl[is]_vec
Date: Tue, 21 Apr 2020 18:17:18 -0700 [thread overview]
Message-ID: <20200422011722.13287-33-richard.henderson@linaro.org> (raw)
In-Reply-To: <20200422011722.13287-1-richard.henderson@linaro.org>
We must continue the special casing of 8-bit elements and the
other element sizes are trivially implemented with shifts.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/i386/tcg-target.inc.c | 85 +++++++++++++++++++++++++++++++--------
1 file changed, 69 insertions(+), 16 deletions(-)
diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
index deace219d2..6039ae4fc6 100644
--- a/tcg/i386/tcg-target.inc.c
+++ b/tcg/i386/tcg-target.inc.c
@@ -3255,6 +3255,7 @@ static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode op)
case INDEX_op_shls_vec:
case INDEX_op_shrs_vec:
case INDEX_op_sars_vec:
+ case INDEX_op_rotls_vec:
case INDEX_op_cmp_vec:
case INDEX_op_x86_shufps_vec:
case INDEX_op_x86_blend_vec:
@@ -3293,6 +3294,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_xor_vec:
case INDEX_op_andc_vec:
return 1;
+ case INDEX_op_rotli_vec:
case INDEX_op_cmp_vec:
case INDEX_op_cmpsel_vec:
return -1;
@@ -3316,6 +3318,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
case INDEX_op_shls_vec:
case INDEX_op_shrs_vec:
+ case INDEX_op_rotls_vec:
return vece >= MO_16;
case INDEX_op_sars_vec:
return vece >= MO_16 && vece <= MO_32;
@@ -3353,7 +3356,7 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
}
}
-static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
+static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
{
TCGv_vec t1, t2;
@@ -3363,26 +3366,31 @@ static void expand_vec_shi(TCGType type, unsigned vece, bool shr,
t1 = tcg_temp_new_vec(type);
t2 = tcg_temp_new_vec(type);
- /* Unpack to W, shift, and repack. Tricky bits:
- (1) Use punpck*bw x,x to produce DDCCBBAA,
- i.e. duplicate in other half of the 16-bit lane.
- (2) For right-shift, add 8 so that the high half of
- the lane becomes zero. For left-shift, we must
- shift up and down again.
- (3) Step 2 leaves high half zero such that PACKUSWB
- (pack with unsigned saturation) does not modify
- the quantity. */
+ /*
+ * Unpack to W, shift, and repack. Tricky bits:
+ * (1) Use punpck*bw x,x to produce DDCCBBAA,
+ * i.e. duplicate in other half of the 16-bit lane.
+ * (2) For right-shift, add 8 so that the high half of the lane
+ * becomes zero. For left-shift, and left-rotate, we must
+ * shift up and down again.
+ * (3) Step 2 leaves high half zero such that PACKUSWB
+ * (pack with unsigned saturation) does not modify
+ * the quantity.
+ */
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
- if (shr) {
- tcg_gen_shri_vec(MO_16, t1, t1, imm + 8);
- tcg_gen_shri_vec(MO_16, t2, t2, imm + 8);
+ if (opc != INDEX_op_rotli_vec) {
+ imm += 8;
+ }
+ if (opc == INDEX_op_shri_vec) {
+ tcg_gen_shri_vec(MO_16, t1, t1, imm);
+ tcg_gen_shri_vec(MO_16, t2, t2, imm);
} else {
- tcg_gen_shli_vec(MO_16, t1, t1, imm + 8);
- tcg_gen_shli_vec(MO_16, t2, t2, imm + 8);
+ tcg_gen_shli_vec(MO_16, t1, t1, imm);
+ tcg_gen_shli_vec(MO_16, t2, t2, imm);
tcg_gen_shri_vec(MO_16, t1, t1, 8);
tcg_gen_shri_vec(MO_16, t2, t2, 8);
}
@@ -3449,6 +3457,43 @@ static void expand_vec_sari(TCGType type, unsigned vece,
}
}
+static void expand_vec_rotli(TCGType type, unsigned vece,
+ TCGv_vec v0, TCGv_vec v1, TCGArg imm)
+{
+ TCGv_vec t;
+
+ if (vece == MO_8) {
+ expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
+ return;
+ }
+
+ t = tcg_temp_new_vec(type);
+ tcg_gen_shli_vec(vece, t, v1, imm);
+ tcg_gen_shri_vec(vece, v0, v1, (8 << vece) - imm);
+ tcg_gen_or_vec(vece, v0, v0, t);
+ tcg_temp_free_vec(t);
+}
+
+static void expand_vec_rotls(TCGType type, unsigned vece,
+ TCGv_vec v0, TCGv_vec v1, TCGv_i32 lsh)
+{
+ TCGv_i32 rsh;
+ TCGv_vec t;
+
+ tcg_debug_assert(vece != MO_8);
+
+ t = tcg_temp_new_vec(type);
+ rsh = tcg_temp_new_i32();
+
+ tcg_gen_neg_i32(rsh, lsh);
+ tcg_gen_andi_i32(rsh, rsh, (8 << vece) - 1);
+ tcg_gen_shls_vec(vece, t, v1, lsh);
+ tcg_gen_shrs_vec(vece, v0, v1, rsh);
+ tcg_gen_or_vec(vece, v0, v0, t);
+ tcg_temp_free_vec(t);
+ tcg_temp_free_i32(rsh);
+}
+
static void expand_vec_mul(TCGType type, unsigned vece,
TCGv_vec v0, TCGv_vec v1, TCGv_vec v2)
{
@@ -3658,13 +3703,21 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
switch (opc) {
case INDEX_op_shli_vec:
case INDEX_op_shri_vec:
- expand_vec_shi(type, vece, opc == INDEX_op_shri_vec, v0, v1, a2);
+ expand_vec_shi(type, vece, opc, v0, v1, a2);
break;
case INDEX_op_sari_vec:
expand_vec_sari(type, vece, v0, v1, a2);
break;
+ case INDEX_op_rotli_vec:
+ expand_vec_rotli(type, vece, v0, v1, a2);
+ break;
+
+ case INDEX_op_rotls_vec:
+ expand_vec_rotls(type, vece, v0, v1, temp_tcgv_i32(arg_temp(a2)));
+ break;
+
case INDEX_op_mul_vec:
v2 = temp_tcgv_vec(arg_temp(a2));
expand_vec_mul(type, vece, v0, v1, v2);
--
2.20.1
next prev parent reply other threads:[~2020-04-22 1:35 UTC|newest]
Thread overview: 75+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-22 1:16 [PATCH v2 00/36] tcg 5.1 omnibus patch set Richard Henderson
2020-04-22 1:16 ` [PATCH v2 01/36] tcg: Add tcg_gen_gvec_dup_imm Richard Henderson
2020-04-22 1:16 ` [PATCH v2 02/36] target/s390x: Use tcg_gen_gvec_dup_imm Richard Henderson
2020-04-22 1:16 ` [PATCH v2 03/36] target/ppc: " Richard Henderson
2020-04-22 1:16 ` [PATCH v2 04/36] target/arm: " Richard Henderson
2020-04-22 1:16 ` [PATCH v2 05/36] tcg: Use tcg_gen_gvec_dup_imm in logical simplifications Richard Henderson
2020-04-22 1:16 ` [PATCH v2 06/36] tcg: Remove tcg_gen_gvec_dup{8,16,32,64}i Richard Henderson
2020-04-22 1:16 ` [PATCH v2 07/36] tcg: Add tcg_gen_gvec_dup_tl Richard Henderson
2020-04-22 1:16 ` [PATCH v2 08/36] tcg: Improve vector tail clearing Richard Henderson
2020-04-22 1:16 ` [PATCH v2 09/36] tcg: Consolidate 3 bits into enum TCGTempKind Richard Henderson
2020-04-22 11:25 ` Alex Bennée
2020-04-22 19:58 ` Aleksandar Markovic
2020-04-23 9:00 ` Philippe Mathieu-Daudé
2020-04-23 15:40 ` Richard Henderson
2020-04-23 17:24 ` Daniel P. Berrangé
2020-04-23 23:11 ` Richard Henderson
2020-04-24 9:08 ` Daniel P. Berrangé
2020-04-22 1:16 ` [PATCH v2 10/36] tcg: Add temp_readonly Richard Henderson
2020-04-22 11:26 ` Alex Bennée
2020-04-22 1:16 ` [PATCH v2 11/36] tcg: Introduce TYPE_CONST temporaries Richard Henderson
2020-04-22 15:17 ` Alex Bennée
2020-04-22 16:55 ` Richard Henderson
2020-04-22 1:16 ` [PATCH v2 12/36] tcg: Use tcg_constant_i32 with icount expander Richard Henderson
2020-04-22 15:40 ` Alex Bennée
2020-04-22 1:16 ` [PATCH v2 13/36] tcg: Use tcg_constant_{i32, i64} with tcg int expanders Richard Henderson
2020-04-22 16:18 ` [PATCH v2 13/36] tcg: Use tcg_constant_{i32,i64} " Alex Bennée
2020-04-22 17:02 ` Richard Henderson
2020-04-22 17:57 ` Alex Bennée
2020-04-22 20:04 ` Alex Bennée
2020-04-23 23:13 ` Richard Henderson
2020-04-24 13:23 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 14/36] tcg: Use tcg_constant_{i32, vec} with tcg vec expanders Richard Henderson
2020-04-22 17:00 ` [PATCH v2 14/36] tcg: Use tcg_constant_{i32,vec} " Alex Bennée
2020-04-22 1:17 ` [PATCH v2 15/36] tcg: Use tcg_constant_{i32,i64} with tcg plugins Richard Henderson
2020-04-22 17:18 ` [PATCH v2 15/36] tcg: Use tcg_constant_{i32, i64} " Alex Bennée
2020-04-22 1:17 ` [PATCH v2 16/36] tcg: Rename struct tcg_temp_info to TempOptInfo Richard Henderson
2020-04-22 17:19 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 17/36] tcg/optimize: Adjust TempOptInfo allocation Richard Henderson
2020-04-22 17:53 ` Alex Bennée
2020-04-22 18:28 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 18/36] tcg/optimize: Use tcg_constant_internal with constant folding Richard Henderson
2020-04-22 18:28 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 19/36] tcg/tci: Add special tci_movi_{i32,i64} opcodes Richard Henderson
2020-04-22 19:02 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 20/36] tcg: Remove movi and dupi opcodes Richard Henderson
2020-04-22 9:12 ` Aleksandar Markovic
2020-04-22 19:03 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 21/36] tcg: Use tcg_out_dupi_vec from temp_load Richard Henderson
2020-04-22 19:28 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 22/36] tcg: Increase tcg_out_dupi_vec immediate to int64_t Richard Henderson
2020-04-22 19:33 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 23/36] tcg: Add tcg_reg_alloc_dup2 Richard Henderson
2020-04-22 19:40 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 24/36] tcg/i386: Use tcg_constant_vec with tcg vec expanders Richard Henderson
2020-04-22 19:43 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 25/36] tcg: Remove tcg_gen_dup{8,16,32,64}i_vec Richard Henderson
2020-04-23 9:11 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 26/36] tcg: Add load_dest parameter to GVecGen2 Richard Henderson
2020-04-23 9:37 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 27/36] tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32, 64} Richard Henderson
2020-04-22 10:19 ` Philippe Mathieu-Daudé
2020-04-23 9:38 ` [PATCH v2 27/36] tcg: Fix integral argument type to tcg_gen_rot[rl]i_i{32,64} Alex Bennée
2020-04-22 1:17 ` [PATCH v2 28/36] tcg: Implement gvec support for rotate by immediate Richard Henderson
2020-04-23 13:28 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 29/36] tcg: Implement gvec support for rotate by vector Richard Henderson
2020-04-23 13:41 ` Alex Bennée
2020-04-22 1:17 ` [PATCH v2 30/36] tcg: Remove expansion to shift by vector from do_shifts Richard Henderson
2020-04-22 1:17 ` [PATCH v2 31/36] tcg: Implement gvec support for rotate by scalar Richard Henderson
2020-04-23 13:46 ` Alex Bennée
2020-04-22 1:17 ` Richard Henderson [this message]
2020-04-22 1:17 ` [PATCH v2 33/36] tcg/aarch64: Implement INDEX_op_rotli_vec Richard Henderson
2020-04-22 1:17 ` [PATCH v2 34/36] tcg/ppc: Implement INDEX_op_rot[lr]v_vec Richard Henderson
2020-04-22 1:17 ` [PATCH v2 35/36] target/ppc: Use tcg_gen_gvec_rotlv Richard Henderson
2020-04-22 1:17 ` [PATCH v2 36/36] target/s390x: Use tcg_gen_gvec_rotl{i,s,v} Richard Henderson
2020-04-23 13:50 ` [PATCH v2 00/36] tcg 5.1 omnibus patch set Alex Bennée
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200422011722.13287-33-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=alex.bennee@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).