From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Subject: [PATCH 12/43] tcg: Increase tcg_out_dupi_vec immediate to int64_t
Date: Tue, 8 Sep 2020 17:16:16 -0700 [thread overview]
Message-ID: <20200909001647.532249-13-richard.henderson@linaro.org> (raw)
In-Reply-To: <20200909001647.532249-1-richard.henderson@linaro.org>
While we don't store more than tcg_target_long in TCGTemp,
we shouldn't be limited to that for code generation. We will
be able to use this for INDEX_op_dup2_vec with 2 constants.
Also pass along the minimal vece that may be said to apply
to the constant. This allows some simplification in the
various backends.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/tcg.c | 31 +++++++++++++++++++++++++-----
tcg/aarch64/tcg-target.c.inc | 12 ++++++------
tcg/i386/tcg-target.c.inc | 22 ++++++++++++---------
tcg/ppc/tcg-target.c.inc | 37 +++++++++++++++++++++++-------------
4 files changed, 69 insertions(+), 33 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 84bdcc6537..6474a695f2 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -116,8 +116,8 @@ static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg src);
static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
TCGReg dst, TCGReg base, intptr_t offset);
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
- TCGReg dst, tcg_target_long arg);
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, int64_t arg);
static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
unsigned vece, const TCGArg *args,
const int *const_args);
@@ -132,8 +132,8 @@ static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
{
g_assert_not_reached();
}
-static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
- TCGReg dst, tcg_target_long arg)
+static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg dst, int64_t arg)
{
g_assert_not_reached();
}
@@ -3308,7 +3308,28 @@ static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
if (ts->type <= TCG_TYPE_I64) {
tcg_out_movi(s, ts->type, reg, ts->val);
} else {
- tcg_out_dupi_vec(s, ts->type, reg, ts->val);
+ uint64_t val = ts->val;
+ MemOp vece = MO_64;
+
+ /*
+ * Find the minimal vector element that matches the constant.
+ * The targets will, in general, have to do this search anyway,
+ * do this generically.
+ */
+ if (TCG_TARGET_REG_BITS == 32) {
+ val = dup_const(MO_32, val);
+ vece = MO_32;
+ }
+ if (val == dup_const(MO_8, val)) {
+ vece = MO_8;
+ } else if (val == dup_const(MO_16, val)) {
+ vece = MO_16;
+ } else if (TCG_TARGET_REG_BITS == 64 &&
+ val == dup_const(MO_32, val)) {
+ vece = MO_32;
+ }
+
+ tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
}
ts->mem_coherent = 0;
break;
diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
index 72c4d25fdb..5692607087 100644
--- a/tcg/aarch64/tcg-target.c.inc
+++ b/tcg/aarch64/tcg-target.c.inc
@@ -853,14 +853,14 @@ static void tcg_out_logicali(TCGContext *s, AArch64Insn insn, TCGType ext,
tcg_out_insn_3404(s, insn, ext, rd, rn, ext, r, c);
}
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
- TCGReg rd, tcg_target_long v64)
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg rd, int64_t v64)
{
bool q = type == TCG_TYPE_V128;
int cmode, imm8, i;
/* Test all bytes equal first. */
- if (v64 == dup_const(MO_8, v64)) {
+ if (vece == MO_8) {
imm8 = (uint8_t)v64;
tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0xe, imm8);
return;
@@ -887,7 +887,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
* cannot find an expansion there's no point checking a larger
* width because we already know by replication it cannot match.
*/
- if (v64 == dup_const(MO_16, v64)) {
+ if (vece == MO_16) {
uint16_t v16 = v64;
if (is_shimm16(v16, &cmode, &imm8)) {
@@ -906,7 +906,7 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
tcg_out_insn(s, 3606, MOVI, q, rd, 0, 0x8, v16 & 0xff);
tcg_out_insn(s, 3606, ORR, q, rd, 0, 0xa, v16 >> 8);
return;
- } else if (v64 == dup_const(MO_32, v64)) {
+ } else if (vece == MO_32) {
uint32_t v32 = v64;
uint32_t n32 = ~v32;
@@ -2430,7 +2430,7 @@ static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
tcg_out_insn_3617(s, insn, is_q, vece, a0, a1);
break;
}
- tcg_out_dupi_vec(s, type, TCG_VEC_TMP, 0);
+ tcg_out_dupi_vec(s, type, MO_8, TCG_VEC_TMP, 0);
a2 = TCG_VEC_TMP;
}
insn = cmp_insn[cond];
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index e2852cbb09..6b7cbaa47a 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -933,8 +933,8 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
return true;
}
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
- TCGReg ret, tcg_target_long arg)
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg ret, int64_t arg)
{
int vex_l = (type == TCG_TYPE_V256 ? P_VEXL : 0);
@@ -947,7 +947,14 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
return;
}
- if (TCG_TARGET_REG_BITS == 64) {
+ if (TCG_TARGET_REG_BITS == 32 && vece < MO_64) {
+ if (have_avx2) {
+ tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
+ } else {
+ tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
+ }
+ new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
+ } else {
if (type == TCG_TYPE_V64) {
tcg_out_vex_modrm_pool(s, OPC_MOVQ_VqWq, ret);
} else if (have_avx2) {
@@ -955,14 +962,11 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
} else {
tcg_out_vex_modrm_pool(s, OPC_MOVDDUP, ret);
}
- new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
- } else {
- if (have_avx2) {
- tcg_out_vex_modrm_pool(s, OPC_VPBROADCASTD + vex_l, ret);
+ if (TCG_TARGET_REG_BITS == 64) {
+ new_pool_label(s, arg, R_386_PC32, s->code_ptr - 4, -4);
} else {
- tcg_out_vex_modrm_pool(s, OPC_VBROADCASTSS, ret);
+ new_pool_l2(s, R_386_32, s->code_ptr - 4, 0, arg, arg >> 32);
}
- new_pool_label(s, arg, R_386_32, s->code_ptr - 4, 0);
}
}
diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
index bc6fb09adb..ff56f1971f 100644
--- a/tcg/ppc/tcg-target.c.inc
+++ b/tcg/ppc/tcg-target.c.inc
@@ -908,31 +908,41 @@ static void tcg_out_movi_int(TCGContext *s, TCGType type, TCGReg ret,
}
}
-static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
- tcg_target_long val)
+static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
+ TCGReg ret, int64_t val)
{
uint32_t load_insn;
int rel, low;
intptr_t add;
- low = (int8_t)val;
- if (low >= -16 && low < 16) {
- if (val == (tcg_target_long)dup_const(MO_8, low)) {
+ switch (vece) {
+ case MO_8:
+ low = (int8_t)val;
+ if (low >= -16 && low < 16) {
tcg_out32(s, VSPLTISB | VRT(ret) | ((val & 31) << 16));
return;
}
- if (val == (tcg_target_long)dup_const(MO_16, low)) {
+ if (have_isa_3_00) {
+ tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
+ return;
+ }
+ break;
+
+ case MO_16:
+ low = (int16_t)val;
+ if (low >= -16 && low < 16) {
tcg_out32(s, VSPLTISH | VRT(ret) | ((val & 31) << 16));
return;
}
- if (val == (tcg_target_long)dup_const(MO_32, low)) {
+ break;
+
+ case MO_32:
+ low = (int32_t)val;
+ if (low >= -16 && low < 16) {
tcg_out32(s, VSPLTISW | VRT(ret) | ((val & 31) << 16));
return;
}
- }
- if (have_isa_3_00 && val == (tcg_target_long)dup_const(MO_8, val)) {
- tcg_out32(s, XXSPLTIB | VRT(ret) | ((val & 0xff) << 11));
- return;
+ break;
}
/*
@@ -952,14 +962,15 @@ static void tcg_out_dupi_vec(TCGContext *s, TCGType type, TCGReg ret,
if (TCG_TARGET_REG_BITS == 64) {
new_pool_label(s, val, rel, s->code_ptr, add);
} else {
- new_pool_l2(s, rel, s->code_ptr, add, val, val);
+ new_pool_l2(s, rel, s->code_ptr, add, val >> 32, val);
}
} else {
load_insn = LVX | VRT(ret) | RB(TCG_REG_TMP1);
if (TCG_TARGET_REG_BITS == 64) {
new_pool_l2(s, rel, s->code_ptr, add, val, val);
} else {
- new_pool_l4(s, rel, s->code_ptr, add, val, val, val, val);
+ new_pool_l4(s, rel, s->code_ptr, add,
+ val >> 32, val, val >> 32, val);
}
}
--
2.25.1
next prev parent reply other threads:[~2020-09-09 0:21 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-09-09 0:16 [PATCH 00/43] tcg patch queue Richard Henderson
2020-09-09 0:16 ` [PATCH 01/43] tcg: Adjust simd_desc size encoding Richard Henderson
2020-09-09 0:16 ` [PATCH 02/43] tcg: Drop union from TCGArgConstraint Richard Henderson
2020-09-09 17:43 ` Philippe Mathieu-Daudé
2020-09-09 0:16 ` [PATCH 03/43] tcg: Move sorted_args into TCGArgConstraint.sort_index Richard Henderson
2020-09-09 0:16 ` [PATCH 04/43] tcg: Remove TCG_CT_REG Richard Henderson
2020-09-09 0:16 ` [PATCH 05/43] tcg: Move some TCG_CT_* bits to TCGArgConstraint bitfields Richard Henderson
2020-09-09 0:16 ` [PATCH 06/43] tcg: Remove TCGOpDef.used Richard Henderson
2020-09-09 17:45 ` Philippe Mathieu-Daudé
2020-09-09 0:16 ` [PATCH 07/43] tcg/i386: Fix dupi for avx2 32-bit hosts Richard Henderson
2020-09-09 0:16 ` [PATCH 08/43] tcg: Fix generation of dupi_vec for 32-bit host Richard Henderson
2020-09-09 0:16 ` [PATCH 09/43] tcg/optimize: Fold dup2_vec Richard Henderson
2020-09-09 0:16 ` [PATCH 10/43] tcg: Remove TCG_TARGET_HAS_cmp_vec Richard Henderson
2020-09-09 17:47 ` Philippe Mathieu-Daudé
2020-09-09 0:16 ` [PATCH 11/43] tcg: Use tcg_out_dupi_vec from temp_load Richard Henderson
2020-09-09 0:16 ` Richard Henderson [this message]
2020-09-09 0:16 ` [PATCH 13/43] tcg: Consolidate 3 bits into enum TCGTempKind Richard Henderson
2020-09-09 17:52 ` Philippe Mathieu-Daudé
2020-09-09 0:16 ` [PATCH 14/43] tcg: Add temp_readonly Richard Henderson
2020-09-09 0:16 ` [PATCH 15/43] tcg: Expand TCGTemp.val to 64-bits Richard Henderson
2020-09-09 0:16 ` [PATCH 16/43] tcg: Rename struct tcg_temp_info to TempOptInfo Richard Henderson
2020-09-09 0:16 ` [PATCH 17/43] tcg: Expand TempOptInfo to 64-bits Richard Henderson
2020-09-09 0:16 ` [PATCH 18/43] tcg: Introduce TYPE_CONST temporaries Richard Henderson
2020-09-09 0:16 ` [PATCH 19/43] tcg/optimize: Improve find_better_copy Richard Henderson
2020-09-09 0:16 ` [PATCH 20/43] tcg/optimize: Adjust TempOptInfo allocation Richard Henderson
2020-09-09 0:16 ` [PATCH 21/43] tcg/optimize: Use tcg_constant_internal with constant folding Richard Henderson
2020-09-09 0:16 ` [PATCH 22/43] tcg: Convert tcg_gen_dupi_vec to TCG_CONST Richard Henderson
2020-09-09 0:16 ` [PATCH 23/43] tcg: Use tcg_constant_i32 with icount expander Richard Henderson
2020-09-09 0:16 ` [PATCH 24/43] tcg: Use tcg_constant_{i32,i64} with tcg int expanders Richard Henderson
2020-09-09 0:16 ` [PATCH 25/43] tcg: Use tcg_constant_{i32,i64} with tcg plugins Richard Henderson
2020-09-09 0:16 ` [PATCH 26/43] tcg: Use tcg_constant_{i32, i64, vec} with gvec expanders Richard Henderson
2020-09-09 0:16 ` [PATCH 27/43] tcg/tci: Add special tci_movi_{i32,i64} opcodes Richard Henderson
2020-09-09 0:16 ` [PATCH 28/43] tcg: Remove movi and dupi opcodes Richard Henderson
2020-09-09 0:16 ` [PATCH 29/43] tcg: Add tcg_reg_alloc_dup2 Richard Henderson
2020-09-09 0:16 ` [PATCH 30/43] tcg/i386: Use tcg_constant_vec with tcg vec expanders Richard Henderson
2020-09-09 0:16 ` [PATCH 31/43] tcg: Remove tcg_gen_dup{8,16,32,64}i_vec Richard Henderson
2020-09-09 0:16 ` [PATCH 32/43] tcg/ppc: Use tcg_constant_vec with tcg vec expanders Richard Henderson
2020-09-09 0:16 ` [PATCH 33/43] tcg/aarch64: " Richard Henderson
2020-09-09 0:16 ` [PATCH 34/43] tcg: Add tcg-constr.c.inc Richard Henderson
2020-09-09 0:16 ` [PATCH 35/43] tcg/i386: Convert to tcg-constr.c.inc Richard Henderson
2020-09-09 0:16 ` [PATCH 36/43] tcg/aarch64: " Richard Henderson
2020-09-09 0:16 ` [PATCH 37/43] tcg/arm: " Richard Henderson
2020-09-09 0:16 ` [PATCH 38/43] tcg/mips: " Richard Henderson
2020-09-09 0:16 ` [PATCH 39/43] tcg/ppc: " Richard Henderson
2020-09-09 0:16 ` [PATCH 40/43] tcg/riscv: " Richard Henderson
2020-09-09 0:16 ` [PATCH 41/43] tcg/s390: " Richard Henderson
2020-09-09 0:16 ` [PATCH 42/43] tcg/sparc: " Richard Henderson
2020-09-09 0:16 ` [PATCH 43/43] tcg/tci: " Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200909001647.532249-13-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).