From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PATCH 2/5] tcg/i386: Simplify immediate 8-bit logical vector shifts
Date: Wed, 24 Apr 2024 10:09:05 -0700 [thread overview]
Message-ID: <20240424170908.759043-4-richard.henderson@linaro.org> (raw)
In-Reply-To: <20240424170908.759043-1-richard.henderson@linaro.org>
The x86 isa does not have this operation, so we need an expansion.
Use the same algorithm that we use for expanding this vector
operation with integers: perform the shift with a wider type
and then mask the bits that must be zero.
This reduces the instruction count from 5 to 2.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/i386/tcg-target.c.inc | 61 +++++++++------------------------------
1 file changed, 14 insertions(+), 47 deletions(-)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index c6ba498623..6837c519b0 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -3769,49 +3769,20 @@ int tcg_can_emit_vec_op(TCGOpcode opc, TCGType type, unsigned vece)
}
}
-static void expand_vec_shi(TCGType type, unsigned vece, TCGOpcode opc,
+static void expand_vec_shi(TCGType type, unsigned vece, bool right,
TCGv_vec v0, TCGv_vec v1, TCGArg imm)
{
- TCGv_vec t1, t2;
+ uint8_t mask;
tcg_debug_assert(vece == MO_8);
-
- t1 = tcg_temp_new_vec(type);
- t2 = tcg_temp_new_vec(type);
-
- /*
- * Unpack to W, shift, and repack. Tricky bits:
- * (1) Use punpck*bw x,x to produce DDCCBBAA,
- * i.e. duplicate in other half of the 16-bit lane.
- * (2) For right-shift, add 8 so that the high half of the lane
- * becomes zero. For left-shift, and left-rotate, we must
- * shift up and down again.
- * (3) Step 2 leaves high half zero such that PACKUSWB
- * (pack with unsigned saturation) does not modify
- * the quantity.
- */
- vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
- tcgv_vec_arg(t1), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
- vec_gen_3(INDEX_op_x86_punpckh_vec, type, MO_8,
- tcgv_vec_arg(t2), tcgv_vec_arg(v1), tcgv_vec_arg(v1));
-
- if (opc != INDEX_op_rotli_vec) {
- imm += 8;
- }
- if (opc == INDEX_op_shri_vec) {
- tcg_gen_shri_vec(MO_16, t1, t1, imm);
- tcg_gen_shri_vec(MO_16, t2, t2, imm);
+ if (right) {
+ mask = 0xff >> imm;
+ tcg_gen_shri_vec(MO_16, v0, v1, imm);
} else {
- tcg_gen_shli_vec(MO_16, t1, t1, imm);
- tcg_gen_shli_vec(MO_16, t2, t2, imm);
- tcg_gen_shri_vec(MO_16, t1, t1, 8);
- tcg_gen_shri_vec(MO_16, t2, t2, 8);
+ mask = 0xff << imm;
+ tcg_gen_shli_vec(MO_16, v0, v1, imm);
}
-
- vec_gen_3(INDEX_op_x86_packus_vec, type, MO_8,
- tcgv_vec_arg(v0), tcgv_vec_arg(t1), tcgv_vec_arg(t2));
- tcg_temp_free_vec(t1);
- tcg_temp_free_vec(t2);
+ tcg_gen_and_vec(MO_8, v0, v0, tcg_constant_vec(type, MO_8, mask));
}
static void expand_vec_sari(TCGType type, unsigned vece,
@@ -3821,7 +3792,7 @@ static void expand_vec_sari(TCGType type, unsigned vece,
switch (vece) {
case MO_8:
- /* Unpack to W, shift, and repack, as in expand_vec_shi. */
+ /* Unpack to 16-bit, shift, and repack. */
t1 = tcg_temp_new_vec(type);
t2 = tcg_temp_new_vec(type);
vec_gen_3(INDEX_op_x86_punpckl_vec, type, MO_8,
@@ -3874,12 +3845,7 @@ static void expand_vec_rotli(TCGType type, unsigned vece,
{
TCGv_vec t;
- if (vece == MO_8) {
- expand_vec_shi(type, vece, INDEX_op_rotli_vec, v0, v1, imm);
- return;
- }
-
- if (have_avx512vbmi2) {
+ if (vece != MO_8 && have_avx512vbmi2) {
vec_gen_4(INDEX_op_x86_vpshldi_vec, type, vece,
tcgv_vec_arg(v0), tcgv_vec_arg(v1), tcgv_vec_arg(v1), imm);
return;
@@ -4155,10 +4121,11 @@ void tcg_expand_vec_op(TCGOpcode opc, TCGType type, unsigned vece,
switch (opc) {
case INDEX_op_shli_vec:
- case INDEX_op_shri_vec:
- expand_vec_shi(type, vece, opc, v0, v1, a2);
+ expand_vec_shi(type, vece, false, v0, v1, a2);
+ break;
+ case INDEX_op_shri_vec:
+ expand_vec_shi(type, vece, true, v0, v1, a2);
break;
-
case INDEX_op_sari_vec:
expand_vec_sari(type, vece, v0, v1, a2);
break;
--
2.34.1
next prev parent reply other threads:[~2024-04-24 17:10 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-24 17:09 [PATCH 0/5] tcg: Misc improvements Richard Henderson
2024-04-24 17:09 ` [PATCH] target/arm: Restrict translation disabled alignment check to VMSA Richard Henderson
2024-05-03 14:58 ` Philippe Mathieu-Daudé
2024-05-03 14:59 ` Richard Henderson
2024-04-24 17:09 ` [PATCH 1/5] tcg: Add write_aofs to GVecGen3i Richard Henderson
2024-05-03 15:01 ` Philippe Mathieu-Daudé
2024-04-24 17:09 ` Richard Henderson [this message]
2024-04-24 17:09 ` [PATCH 3/5] tcg/i386: Optimize setcond of TST{EQ,NE} with 0xffffffff Richard Henderson
2024-05-03 15:04 ` Philippe Mathieu-Daudé
2024-04-24 17:09 ` [PATCH 4/5] tcg/optimize: Optimize setcond with zmask Richard Henderson
2024-04-24 17:09 ` [PATCH 5/5] accel/tcg: Introduce CF_BP_PAGE Richard Henderson
2024-05-03 15:02 ` Philippe Mathieu-Daudé
2024-05-02 19:34 ` [PATCH 0/5] tcg: Misc improvements Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240424170908.759043-4-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).