* [PATCH 0/2] tcg: Add gvec compare with immediate and scalar operand
@ 2023-08-31 3:09 Richard Henderson
2023-08-31 3:09 ` [PATCH 1/2] " Richard Henderson
2023-08-31 3:09 ` [PATCH 2/2] target/arm: Use tcg_gen_gvec_cmpi for compare vs 0 Richard Henderson
0 siblings, 2 replies; 7+ messages in thread
From: Richard Henderson @ 2023-08-31 3:09 UTC (permalink / raw)
To: qemu-devel; +Cc: gaosong
This should be usable for loongarch64 as well, rather than
rolling a local copy there.
r~
Richard Henderson (2):
tcg: Add gvec compare with immediate and scalar operand
target/arm: Use tcg_gen_gvec_cmpi for compare vs 0
accel/tcg/tcg-runtime.h | 25 ++++++
include/tcg/tcg-op-gvec-common.h | 6 ++
accel/tcg/tcg-runtime-gvec.c | 26 ++++++
target/arm/tcg/translate.c | 56 ++----------
tcg/tcg-op-gvec.c | 150 +++++++++++++++++++++++++++++++
5 files changed, 216 insertions(+), 47 deletions(-)
--
2.34.1
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 1/2] tcg: Add gvec compare with immediate and scalar operand
2023-08-31 3:09 [PATCH 0/2] tcg: Add gvec compare with immediate and scalar operand Richard Henderson
@ 2023-08-31 3:09 ` Richard Henderson
2023-09-07 7:39 ` gaosong
2023-08-31 3:09 ` [PATCH 2/2] target/arm: Use tcg_gen_gvec_cmpi for compare vs 0 Richard Henderson
1 sibling, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2023-08-31 3:09 UTC (permalink / raw)
To: qemu-devel; +Cc: gaosong
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/tcg-runtime.h | 25 ++++++
include/tcg/tcg-op-gvec-common.h | 6 ++
accel/tcg/tcg-runtime-gvec.c | 26 ++++++
tcg/tcg-op-gvec.c | 150 +++++++++++++++++++++++++++++++
4 files changed, 207 insertions(+)
diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
index 186899a2c7..c23b5e66c4 100644
--- a/accel/tcg/tcg-runtime.h
+++ b/accel/tcg/tcg-runtime.h
@@ -297,4 +297,29 @@ DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_4(gvec_eqs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_eqs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_eqs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_eqs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_lts8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_lts16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_lts32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_lts64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_les8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_les16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_les32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_les64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_ltus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_ltus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_ltus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_ltus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
+DEF_HELPER_FLAGS_4(gvec_leus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_leus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
+
DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
diff --git a/include/tcg/tcg-op-gvec-common.h b/include/tcg/tcg-op-gvec-common.h
index e2683d487f..4db8a58c14 100644
--- a/include/tcg/tcg-op-gvec-common.h
+++ b/include/tcg/tcg-op-gvec-common.h
@@ -374,6 +374,12 @@ void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
uint32_t aofs, uint32_t bofs,
uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
+ uint32_t aofs, int64_t c,
+ uint32_t oprsz, uint32_t maxsz);
+void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
+ uint32_t aofs, TCGv_i64 c,
+ uint32_t oprsz, uint32_t maxsz);
/*
* Perform vector bit select: d = (b & a) | (c & ~a).
diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
index 6c99f952ca..afca89baa1 100644
--- a/accel/tcg/tcg-runtime-gvec.c
+++ b/accel/tcg/tcg-runtime-gvec.c
@@ -1042,6 +1042,32 @@ DO_CMP2(64)
#undef DO_CMP1
#undef DO_CMP2
+#define DO_CMP1(NAME, TYPE, OP) \
+void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc) \
+{ \
+ intptr_t oprsz = simd_oprsz(desc); \
+ TYPE inv = simd_data(desc), b = b64; \
+ for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) { \
+ *(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv); \
+ } \
+ clear_high(d, oprsz, desc); \
+}
+
+#define DO_CMP2(SZ) \
+ DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \
+ DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \
+ DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \
+ DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \
+ DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)
+
+DO_CMP2(8)
+DO_CMP2(16)
+DO_CMP2(32)
+DO_CMP2(64)
+
+#undef DO_CMP1
+#undef DO_CMP2
+
void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
{
intptr_t oprsz = simd_oprsz(desc);
diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
index f5cfd9bf99..f7ca9e1051 100644
--- a/tcg/tcg-op-gvec.c
+++ b/tcg/tcg-op-gvec.c
@@ -3819,6 +3819,156 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
}
}
+void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
+ uint32_t aofs, TCGv_i64 c,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 };
+ static gen_helper_gvec_2i * const eq_fn[4] = {
+ gen_helper_gvec_eqs8, gen_helper_gvec_eqs16,
+ gen_helper_gvec_eqs32, gen_helper_gvec_eqs64
+ };
+ static gen_helper_gvec_2i * const lt_fn[4] = {
+ gen_helper_gvec_lts8, gen_helper_gvec_lts16,
+ gen_helper_gvec_lts32, gen_helper_gvec_lts64
+ };
+ static gen_helper_gvec_2i * const le_fn[4] = {
+ gen_helper_gvec_les8, gen_helper_gvec_les16,
+ gen_helper_gvec_les32, gen_helper_gvec_les64
+ };
+ static gen_helper_gvec_2i * const ltu_fn[4] = {
+ gen_helper_gvec_ltus8, gen_helper_gvec_ltus16,
+ gen_helper_gvec_ltus32, gen_helper_gvec_ltus64
+ };
+ static gen_helper_gvec_2i * const leu_fn[4] = {
+ gen_helper_gvec_leus8, gen_helper_gvec_leus16,
+ gen_helper_gvec_leus32, gen_helper_gvec_leus64
+ };
+ static gen_helper_gvec_2i * const * const fns[16] = {
+ [TCG_COND_EQ] = eq_fn,
+ [TCG_COND_LT] = lt_fn,
+ [TCG_COND_LE] = le_fn,
+ [TCG_COND_LTU] = ltu_fn,
+ [TCG_COND_LEU] = leu_fn,
+ };
+
+ TCGType type;
+
+ check_size_align(oprsz, maxsz, dofs | aofs);
+ check_overlap_2(dofs, aofs, maxsz);
+
+ if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
+ do_dup(MO_8, dofs, oprsz, maxsz,
+ NULL, NULL, -(cond == TCG_COND_ALWAYS));
+ return;
+ }
+
+ /*
+ * Implement inline with a vector type, if possible.
+ * Prefer integer when 64-bit host and 64-bit comparison.
+ */
+ type = choose_vector_type(cmp_list, vece, oprsz,
+ TCG_TARGET_REG_BITS == 64 && vece == MO_64);
+ if (type != 0) {
+ const TCGOpcode *hold_list = tcg_swap_vecop_list(cmp_list);
+ TCGv_vec t_vec = tcg_temp_new_vec(type);
+ uint32_t some, i;
+
+ tcg_gen_dup_i64_vec(vece, t_vec, c);
+
+ switch (type) {
+ case TCG_TYPE_V256:
+ some = QEMU_ALIGN_DOWN(oprsz, 32);
+ for (i = 0; i < some; i += 32) {
+ TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V256);
+ TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V256);
+ tcg_gen_ld_vec(t0, cpu_env, aofs);
+ tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
+ tcg_gen_st_vec(t0, cpu_env, dofs);
+ aofs += 32;
+ dofs += 32;
+ }
+ oprsz -= some;
+ maxsz -= some;
+ /* fallthru */
+
+ case TCG_TYPE_V128:
+ some = QEMU_ALIGN_DOWN(oprsz, 16);
+ for (i = 0; i < some; i += 16) {
+ TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V128);
+ TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V128);
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
+ tcg_gen_st_vec(t0, cpu_env, dofs + i);
+ }
+ break;
+
+ case TCG_TYPE_V64:
+ some = QEMU_ALIGN_DOWN(oprsz, 8);
+ for (i = 0; i < some; i += 8) {
+ TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V64);
+ TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V64);
+ tcg_gen_ld_vec(t0, cpu_env, aofs + i);
+ tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
+ tcg_gen_st_vec(t0, cpu_env, dofs + i);
+ }
+ break;
+
+ default:
+ g_assert_not_reached();
+ }
+ tcg_temp_free_vec(t_vec);
+ tcg_swap_vecop_list(hold_list);
+ } else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
+ TCGv_i64 t0 = tcg_temp_ebb_new_i64();
+ uint32_t i;
+
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i64(t0, cpu_env, aofs + i);
+ tcg_gen_negsetcond_i64(cond, t0, t0, c);
+ tcg_gen_st_i64(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i64(t0);
+ } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
+ TCGv_i32 t0 = tcg_temp_ebb_new_i32();
+ TCGv_i32 t1 = tcg_temp_ebb_new_i32();
+ uint32_t i;
+
+ tcg_gen_extrl_i64_i32(t1, c);
+ for (i = 0; i < oprsz; i += 8) {
+ tcg_gen_ld_i32(t0, cpu_env, aofs + i);
+ tcg_gen_negsetcond_i32(cond, t0, t0, t1);
+ tcg_gen_st_i32(t0, cpu_env, dofs + i);
+ }
+ tcg_temp_free_i32(t0);
+ tcg_temp_free_i32(t1);
+ } else {
+ gen_helper_gvec_2i * const *fn = fns[cond];
+ bool inv = false;
+
+ if (fn == NULL) {
+ cond = tcg_invert_cond(cond);
+ fn = fns[cond];
+ assert(fn != NULL);
+ inv = true;
+ }
+ tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, inv, fn[vece]);
+ return;
+ }
+
+ if (oprsz < maxsz) {
+ expand_clr(dofs + oprsz, maxsz - oprsz);
+ }
+}
+
+void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
+ uint32_t aofs, int64_t c,
+ uint32_t oprsz, uint32_t maxsz)
+{
+ TCGv_i64 tmp = tcg_constant_i64(c);
+ tcg_gen_gvec_cmps(cond, vece, dofs, aofs, tmp, oprsz, maxsz);
+}
+
static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
{
TCGv_i64 t = tcg_temp_ebb_new_i64();
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 2/2] target/arm: Use tcg_gen_gvec_cmpi for compare vs 0
2023-08-31 3:09 [PATCH 0/2] tcg: Add gvec compare with immediate and scalar operand Richard Henderson
2023-08-31 3:09 ` [PATCH 1/2] " Richard Henderson
@ 2023-08-31 3:09 ` Richard Henderson
2023-09-07 12:13 ` gaosong
1 sibling, 1 reply; 7+ messages in thread
From: Richard Henderson @ 2023-08-31 3:09 UTC (permalink / raw)
To: qemu-devel; +Cc: gaosong
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/translate.c | 56 ++++++--------------------------------
1 file changed, 9 insertions(+), 47 deletions(-)
diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
index 38ad8dd4bd..89a7392ed3 100644
--- a/target/arm/tcg/translate.c
+++ b/target/arm/tcg/translate.c
@@ -2943,54 +2943,16 @@ void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
}
-#define GEN_CMP0(NAME, COND) \
- static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
- { \
- tcg_gen_negsetcond_i32(COND, d, a, tcg_constant_i32(0)); \
- } \
- static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
- { \
- tcg_gen_negsetcond_i64(COND, d, a, tcg_constant_i64(0)); \
- } \
- static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
- { \
- TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0); \
- tcg_gen_cmp_vec(COND, vece, d, a, zero); \
- } \
- void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
- uint32_t opr_sz, uint32_t max_sz) \
- { \
- const GVecGen2 op[4] = { \
- { .fno = gen_helper_gvec_##NAME##0_b, \
- .fniv = gen_##NAME##0_vec, \
- .opt_opc = vecop_list_cmp, \
- .vece = MO_8 }, \
- { .fno = gen_helper_gvec_##NAME##0_h, \
- .fniv = gen_##NAME##0_vec, \
- .opt_opc = vecop_list_cmp, \
- .vece = MO_16 }, \
- { .fni4 = gen_##NAME##0_i32, \
- .fniv = gen_##NAME##0_vec, \
- .opt_opc = vecop_list_cmp, \
- .vece = MO_32 }, \
- { .fni8 = gen_##NAME##0_i64, \
- .fniv = gen_##NAME##0_vec, \
- .opt_opc = vecop_list_cmp, \
- .prefer_i64 = TCG_TARGET_REG_BITS == 64, \
- .vece = MO_64 }, \
- }; \
- tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
- }
+#define GEN_CMP0(NAME, COND) \
+ void NAME(unsigned vece, uint32_t d, uint32_t m, \
+ uint32_t opr_sz, uint32_t max_sz) \
+ { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
-static const TCGOpcode vecop_list_cmp[] = {
- INDEX_op_cmp_vec, 0
-};
-
-GEN_CMP0(ceq, TCG_COND_EQ)
-GEN_CMP0(cle, TCG_COND_LE)
-GEN_CMP0(cge, TCG_COND_GE)
-GEN_CMP0(clt, TCG_COND_LT)
-GEN_CMP0(cgt, TCG_COND_GT)
+GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
+GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
+GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
+GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
+GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
#undef GEN_CMP0
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] tcg: Add gvec compare with immediate and scalar operand
2023-08-31 3:09 ` [PATCH 1/2] " Richard Henderson
@ 2023-09-07 7:39 ` gaosong
2023-09-07 12:12 ` gaosong
0 siblings, 1 reply; 7+ messages in thread
From: gaosong @ 2023-09-07 7:39 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
Hi, Richard
在 2023/8/31 上午11:09, Richard Henderson 写道:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> accel/tcg/tcg-runtime.h | 25 ++++++
> include/tcg/tcg-op-gvec-common.h | 6 ++
> accel/tcg/tcg-runtime-gvec.c | 26 ++++++
> tcg/tcg-op-gvec.c | 150 +++++++++++++++++++++++++++++++
> 4 files changed, 207 insertions(+)
>
I use tcg_gen_gvec_cmps for LoongArch vector cmp instructions. but I
got an Aborted error from temp_load(). I'll fixes this later.
And I'll send LASX V5 series. this series will not use tcg_gen_gvec_cmps.
Thanks.
Song Gao
> diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
> index 186899a2c7..c23b5e66c4 100644
> --- a/accel/tcg/tcg-runtime.h
> +++ b/accel/tcg/tcg-runtime.h
> @@ -297,4 +297,29 @@ DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
> DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
> DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
>
> +DEF_HELPER_FLAGS_4(gvec_eqs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_eqs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_eqs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_eqs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +
> +DEF_HELPER_FLAGS_4(gvec_lts8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_lts16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_lts32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_lts64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +
> +DEF_HELPER_FLAGS_4(gvec_les8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_les16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_les32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_les64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +
> +DEF_HELPER_FLAGS_4(gvec_ltus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_ltus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_ltus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_ltus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +
> +DEF_HELPER_FLAGS_4(gvec_leus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_leus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
> +
> DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, ptr, i32)
> diff --git a/include/tcg/tcg-op-gvec-common.h b/include/tcg/tcg-op-gvec-common.h
> index e2683d487f..4db8a58c14 100644
> --- a/include/tcg/tcg-op-gvec-common.h
> +++ b/include/tcg/tcg-op-gvec-common.h
> @@ -374,6 +374,12 @@ void tcg_gen_gvec_rotrv(unsigned vece, uint32_t dofs, uint32_t aofs,
> void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
> uint32_t aofs, uint32_t bofs,
> uint32_t oprsz, uint32_t maxsz);
> +void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
> + uint32_t aofs, int64_t c,
> + uint32_t oprsz, uint32_t maxsz);
> +void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
> + uint32_t aofs, TCGv_i64 c,
> + uint32_t oprsz, uint32_t maxsz);
>
> /*
> * Perform vector bit select: d = (b & a) | (c & ~a).
> diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
> index 6c99f952ca..afca89baa1 100644
> --- a/accel/tcg/tcg-runtime-gvec.c
> +++ b/accel/tcg/tcg-runtime-gvec.c
> @@ -1042,6 +1042,32 @@ DO_CMP2(64)
> #undef DO_CMP1
> #undef DO_CMP2
>
> +#define DO_CMP1(NAME, TYPE, OP) \
> +void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t desc) \
> +{ \
> + intptr_t oprsz = simd_oprsz(desc); \
> + TYPE inv = simd_data(desc), b = b64; \
> + for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE)) { \
> + *(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^ inv); \
> + } \
> + clear_high(d, oprsz, desc); \
> +}
> +
> +#define DO_CMP2(SZ) \
> + DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \
> + DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \
> + DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \
> + DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \
> + DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)
> +
> +DO_CMP2(8)
> +DO_CMP2(16)
> +DO_CMP2(32)
> +DO_CMP2(64)
> +
> +#undef DO_CMP1
> +#undef DO_CMP2
> +
> void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
> {
> intptr_t oprsz = simd_oprsz(desc);
> diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
> index f5cfd9bf99..f7ca9e1051 100644
> --- a/tcg/tcg-op-gvec.c
> +++ b/tcg/tcg-op-gvec.c
> @@ -3819,6 +3819,156 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
> }
> }
>
> +void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
> + uint32_t aofs, TCGv_i64 c,
> + uint32_t oprsz, uint32_t maxsz)
> +{
> + static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 };
> + static gen_helper_gvec_2i * const eq_fn[4] = {
> + gen_helper_gvec_eqs8, gen_helper_gvec_eqs16,
> + gen_helper_gvec_eqs32, gen_helper_gvec_eqs64
> + };
> + static gen_helper_gvec_2i * const lt_fn[4] = {
> + gen_helper_gvec_lts8, gen_helper_gvec_lts16,
> + gen_helper_gvec_lts32, gen_helper_gvec_lts64
> + };
> + static gen_helper_gvec_2i * const le_fn[4] = {
> + gen_helper_gvec_les8, gen_helper_gvec_les16,
> + gen_helper_gvec_les32, gen_helper_gvec_les64
> + };
> + static gen_helper_gvec_2i * const ltu_fn[4] = {
> + gen_helper_gvec_ltus8, gen_helper_gvec_ltus16,
> + gen_helper_gvec_ltus32, gen_helper_gvec_ltus64
> + };
> + static gen_helper_gvec_2i * const leu_fn[4] = {
> + gen_helper_gvec_leus8, gen_helper_gvec_leus16,
> + gen_helper_gvec_leus32, gen_helper_gvec_leus64
> + };
> + static gen_helper_gvec_2i * const * const fns[16] = {
> + [TCG_COND_EQ] = eq_fn,
> + [TCG_COND_LT] = lt_fn,
> + [TCG_COND_LE] = le_fn,
> + [TCG_COND_LTU] = ltu_fn,
> + [TCG_COND_LEU] = leu_fn,
> + };
> +
> + TCGType type;
> +
> + check_size_align(oprsz, maxsz, dofs | aofs);
> + check_overlap_2(dofs, aofs, maxsz);
> +
> + if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
> + do_dup(MO_8, dofs, oprsz, maxsz,
> + NULL, NULL, -(cond == TCG_COND_ALWAYS));
> + return;
> + }
> +
> + /*
> + * Implement inline with a vector type, if possible.
> + * Prefer integer when 64-bit host and 64-bit comparison.
> + */
> + type = choose_vector_type(cmp_list, vece, oprsz,
> + TCG_TARGET_REG_BITS == 64 && vece == MO_64);
> + if (type != 0) {
> + const TCGOpcode *hold_list = tcg_swap_vecop_list(cmp_list);
> + TCGv_vec t_vec = tcg_temp_new_vec(type);
> + uint32_t some, i;
> +
> + tcg_gen_dup_i64_vec(vece, t_vec, c);
> +
> + switch (type) {
> + case TCG_TYPE_V256:
> + some = QEMU_ALIGN_DOWN(oprsz, 32);
> + for (i = 0; i < some; i += 32) {
> + TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V256);
> + TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V256);
> + tcg_gen_ld_vec(t0, cpu_env, aofs);
> + tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
> + tcg_gen_st_vec(t0, cpu_env, dofs);
> + aofs += 32;
> + dofs += 32;
> + }
> + oprsz -= some;
> + maxsz -= some;
> + /* fallthru */
> +
> + case TCG_TYPE_V128:
> + some = QEMU_ALIGN_DOWN(oprsz, 16);
> + for (i = 0; i < some; i += 16) {
> + TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V128);
> + TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V128);
> + tcg_gen_ld_vec(t0, cpu_env, aofs + i);
> + tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
> + tcg_gen_st_vec(t0, cpu_env, dofs + i);
> + }
> + break;
> +
> + case TCG_TYPE_V64:
> + some = QEMU_ALIGN_DOWN(oprsz, 8);
> + for (i = 0; i < some; i += 8) {
> + TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V64);
> + TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V64);
> + tcg_gen_ld_vec(t0, cpu_env, aofs + i);
> + tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
> + tcg_gen_st_vec(t0, cpu_env, dofs + i);
> + }
> + break;
> +
> + default:
> + g_assert_not_reached();
> + }
> + tcg_temp_free_vec(t_vec);
> + tcg_swap_vecop_list(hold_list);
> + } else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
> + TCGv_i64 t0 = tcg_temp_ebb_new_i64();
> + uint32_t i;
> +
> + for (i = 0; i < oprsz; i += 8) {
> + tcg_gen_ld_i64(t0, cpu_env, aofs + i);
> + tcg_gen_negsetcond_i64(cond, t0, t0, c);
> + tcg_gen_st_i64(t0, cpu_env, dofs + i);
> + }
> + tcg_temp_free_i64(t0);
> + } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
> + TCGv_i32 t0 = tcg_temp_ebb_new_i32();
> + TCGv_i32 t1 = tcg_temp_ebb_new_i32();
> + uint32_t i;
> +
> + tcg_gen_extrl_i64_i32(t1, c);
> + for (i = 0; i < oprsz; i += 8) {
> + tcg_gen_ld_i32(t0, cpu_env, aofs + i);
> + tcg_gen_negsetcond_i32(cond, t0, t0, t1);
> + tcg_gen_st_i32(t0, cpu_env, dofs + i);
> + }
> + tcg_temp_free_i32(t0);
> + tcg_temp_free_i32(t1);
> + } else {
> + gen_helper_gvec_2i * const *fn = fns[cond];
> + bool inv = false;
> +
> + if (fn == NULL) {
> + cond = tcg_invert_cond(cond);
> + fn = fns[cond];
> + assert(fn != NULL);
> + inv = true;
> + }
> + tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, inv, fn[vece]);
> + return;
> + }
> +
> + if (oprsz < maxsz) {
> + expand_clr(dofs + oprsz, maxsz - oprsz);
> + }
> +}
> +
> +void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
> + uint32_t aofs, int64_t c,
> + uint32_t oprsz, uint32_t maxsz)
> +{
> + TCGv_i64 tmp = tcg_constant_i64(c);
> + tcg_gen_gvec_cmps(cond, vece, dofs, aofs, tmp, oprsz, maxsz);
> +}
> +
> static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b, TCGv_i64 c)
> {
> TCGv_i64 t = tcg_temp_ebb_new_i64();
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] tcg: Add gvec compare with immediate and scalar operand
2023-09-07 7:39 ` gaosong
@ 2023-09-07 12:12 ` gaosong
2023-09-11 12:38 ` gaosong
0 siblings, 1 reply; 7+ messages in thread
From: gaosong @ 2023-09-07 12:12 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
在 2023/9/7 下午3:39, gaosong 写道:
> Hi, Richard
> 在 2023/8/31 上午11:09, Richard Henderson 写道:
>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>> ---
>> accel/tcg/tcg-runtime.h | 25 ++++++
>> include/tcg/tcg-op-gvec-common.h | 6 ++
>> accel/tcg/tcg-runtime-gvec.c | 26 ++++++
>> tcg/tcg-op-gvec.c | 150 +++++++++++++++++++++++++++++++
>> 4 files changed, 207 insertions(+)
>>
>
> I use tcg_gen_gvec_cmps for LoongArch vector cmp instructions. but I
> got an Aborted error from temp_load(). I'll fixes this later.
>
My mistaken, It's work well.
I will use tcg_gen_gvec_cmps on LoongArch's LASX series.
For this patch:
Tested-by: Song Gao <gaosong@loongson.cn>
Reviewed-by: Song Gao <gaosong@loongson.cn>
Thanks.
Song Gao
> And I'll send LASX V5 series. this series will not use tcg_gen_gvec_cmps.
>
> Thanks.
> Song Gao
>
>> diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
>> index 186899a2c7..c23b5e66c4 100644
>> --- a/accel/tcg/tcg-runtime.h
>> +++ b/accel/tcg/tcg-runtime.h
>> @@ -297,4 +297,29 @@ DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG,
>> void, ptr, ptr, ptr, i32)
>> DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr, ptr,
>> i32)
>> DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr, ptr,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_eqs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
>> +DEF_HELPER_FLAGS_4(gvec_eqs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_eqs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_eqs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +
>> +DEF_HELPER_FLAGS_4(gvec_lts8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
>> +DEF_HELPER_FLAGS_4(gvec_lts16, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_lts32, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_lts64, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +
>> +DEF_HELPER_FLAGS_4(gvec_les8, TCG_CALL_NO_RWG, void, ptr, ptr, i64, i32)
>> +DEF_HELPER_FLAGS_4(gvec_les16, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_les32, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_les64, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +
>> +DEF_HELPER_FLAGS_4(gvec_ltus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_ltus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_ltus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_ltus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +
>> +DEF_HELPER_FLAGS_4(gvec_leus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_leus16, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>> i32)
>> +
>> DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr,
>> ptr, ptr, i32)
>> diff --git a/include/tcg/tcg-op-gvec-common.h
>> b/include/tcg/tcg-op-gvec-common.h
>> index e2683d487f..4db8a58c14 100644
>> --- a/include/tcg/tcg-op-gvec-common.h
>> +++ b/include/tcg/tcg-op-gvec-common.h
>> @@ -374,6 +374,12 @@ void tcg_gen_gvec_rotrv(unsigned vece, uint32_t
>> dofs, uint32_t aofs,
>> void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
>> uint32_t aofs, uint32_t bofs,
>> uint32_t oprsz, uint32_t maxsz);
>> +void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
>> + uint32_t aofs, int64_t c,
>> + uint32_t oprsz, uint32_t maxsz);
>> +void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
>> + uint32_t aofs, TCGv_i64 c,
>> + uint32_t oprsz, uint32_t maxsz);
>> /*
>> * Perform vector bit select: d = (b & a) | (c & ~a).
>> diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
>> index 6c99f952ca..afca89baa1 100644
>> --- a/accel/tcg/tcg-runtime-gvec.c
>> +++ b/accel/tcg/tcg-runtime-gvec.c
>> @@ -1042,6 +1042,32 @@ DO_CMP2(64)
>> #undef DO_CMP1
>> #undef DO_CMP2
>> +#define DO_CMP1(NAME, TYPE,
>> OP) \
>> +void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t
>> desc) \
>> +{
>> \
>> + intptr_t oprsz =
>> simd_oprsz(desc); \
>> + TYPE inv = simd_data(desc), b =
>> b64; \
>> + for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE))
>> { \
>> + *(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^
>> inv); \
>> +
>> } \
>> + clear_high(d, oprsz,
>> desc); \
>> +}
>> +
>> +#define DO_CMP2(SZ) \
>> + DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \
>> + DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \
>> + DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \
>> + DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \
>> + DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)
>> +
>> +DO_CMP2(8)
>> +DO_CMP2(16)
>> +DO_CMP2(32)
>> +DO_CMP2(64)
>> +
>> +#undef DO_CMP1
>> +#undef DO_CMP2
>> +
>> void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
>> {
>> intptr_t oprsz = simd_oprsz(desc);
>> diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
>> index f5cfd9bf99..f7ca9e1051 100644
>> --- a/tcg/tcg-op-gvec.c
>> +++ b/tcg/tcg-op-gvec.c
>> @@ -3819,6 +3819,156 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned
>> vece, uint32_t dofs,
>> }
>> }
>> +void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
>> + uint32_t aofs, TCGv_i64 c,
>> + uint32_t oprsz, uint32_t maxsz)
>> +{
>> + static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 };
>> + static gen_helper_gvec_2i * const eq_fn[4] = {
>> + gen_helper_gvec_eqs8, gen_helper_gvec_eqs16,
>> + gen_helper_gvec_eqs32, gen_helper_gvec_eqs64
>> + };
>> + static gen_helper_gvec_2i * const lt_fn[4] = {
>> + gen_helper_gvec_lts8, gen_helper_gvec_lts16,
>> + gen_helper_gvec_lts32, gen_helper_gvec_lts64
>> + };
>> + static gen_helper_gvec_2i * const le_fn[4] = {
>> + gen_helper_gvec_les8, gen_helper_gvec_les16,
>> + gen_helper_gvec_les32, gen_helper_gvec_les64
>> + };
>> + static gen_helper_gvec_2i * const ltu_fn[4] = {
>> + gen_helper_gvec_ltus8, gen_helper_gvec_ltus16,
>> + gen_helper_gvec_ltus32, gen_helper_gvec_ltus64
>> + };
>> + static gen_helper_gvec_2i * const leu_fn[4] = {
>> + gen_helper_gvec_leus8, gen_helper_gvec_leus16,
>> + gen_helper_gvec_leus32, gen_helper_gvec_leus64
>> + };
>> + static gen_helper_gvec_2i * const * const fns[16] = {
>> + [TCG_COND_EQ] = eq_fn,
>> + [TCG_COND_LT] = lt_fn,
>> + [TCG_COND_LE] = le_fn,
>> + [TCG_COND_LTU] = ltu_fn,
>> + [TCG_COND_LEU] = leu_fn,
>> + };
>> +
>> + TCGType type;
>> +
>> + check_size_align(oprsz, maxsz, dofs | aofs);
>> + check_overlap_2(dofs, aofs, maxsz);
>> +
>> + if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
>> + do_dup(MO_8, dofs, oprsz, maxsz,
>> + NULL, NULL, -(cond == TCG_COND_ALWAYS));
>> + return;
>> + }
>> +
>> + /*
>> + * Implement inline with a vector type, if possible.
>> + * Prefer integer when 64-bit host and 64-bit comparison.
>> + */
>> + type = choose_vector_type(cmp_list, vece, oprsz,
>> + TCG_TARGET_REG_BITS == 64 && vece ==
>> MO_64);
>> + if (type != 0) {
>> + const TCGOpcode *hold_list = tcg_swap_vecop_list(cmp_list);
>> + TCGv_vec t_vec = tcg_temp_new_vec(type);
>> + uint32_t some, i;
>> +
>> + tcg_gen_dup_i64_vec(vece, t_vec, c);
>> +
>> + switch (type) {
>> + case TCG_TYPE_V256:
>> + some = QEMU_ALIGN_DOWN(oprsz, 32);
>> + for (i = 0; i < some; i += 32) {
>> + TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V256);
>> + TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V256);
>> + tcg_gen_ld_vec(t0, cpu_env, aofs);
>> + tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
>> + tcg_gen_st_vec(t0, cpu_env, dofs);
>> + aofs += 32;
>> + dofs += 32;
>> + }
>> + oprsz -= some;
>> + maxsz -= some;
>> + /* fallthru */
>> +
>> + case TCG_TYPE_V128:
>> + some = QEMU_ALIGN_DOWN(oprsz, 16);
>> + for (i = 0; i < some; i += 16) {
>> + TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V128);
>> + TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V128);
>> + tcg_gen_ld_vec(t0, cpu_env, aofs + i);
>> + tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
>> + tcg_gen_st_vec(t0, cpu_env, dofs + i);
>> + }
>> + break;
>> +
>> + case TCG_TYPE_V64:
>> + some = QEMU_ALIGN_DOWN(oprsz, 8);
>> + for (i = 0; i < some; i += 8) {
>> + TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V64);
>> + TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V64);
>> + tcg_gen_ld_vec(t0, cpu_env, aofs + i);
>> + tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
>> + tcg_gen_st_vec(t0, cpu_env, dofs + i);
>> + }
>> + break;
>> +
>> + default:
>> + g_assert_not_reached();
>> + }
>> + tcg_temp_free_vec(t_vec);
>> + tcg_swap_vecop_list(hold_list);
>> + } else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
>> + TCGv_i64 t0 = tcg_temp_ebb_new_i64();
>> + uint32_t i;
>> +
>> + for (i = 0; i < oprsz; i += 8) {
>> + tcg_gen_ld_i64(t0, cpu_env, aofs + i);
>> + tcg_gen_negsetcond_i64(cond, t0, t0, c);
>> + tcg_gen_st_i64(t0, cpu_env, dofs + i);
>> + }
>> + tcg_temp_free_i64(t0);
>> + } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
>> + TCGv_i32 t0 = tcg_temp_ebb_new_i32();
>> + TCGv_i32 t1 = tcg_temp_ebb_new_i32();
>> + uint32_t i;
>> +
>> + tcg_gen_extrl_i64_i32(t1, c);
>> + for (i = 0; i < oprsz; i += 8) {
>> + tcg_gen_ld_i32(t0, cpu_env, aofs + i);
>> + tcg_gen_negsetcond_i32(cond, t0, t0, t1);
>> + tcg_gen_st_i32(t0, cpu_env, dofs + i);
>> + }
>> + tcg_temp_free_i32(t0);
>> + tcg_temp_free_i32(t1);
>> + } else {
>> + gen_helper_gvec_2i * const *fn = fns[cond];
>> + bool inv = false;
>> +
>> + if (fn == NULL) {
>> + cond = tcg_invert_cond(cond);
>> + fn = fns[cond];
>> + assert(fn != NULL);
>> + inv = true;
>> + }
>> + tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, inv, fn[vece]);
>> + return;
>> + }
>> +
>> + if (oprsz < maxsz) {
>> + expand_clr(dofs + oprsz, maxsz - oprsz);
>> + }
>> +}
>> +
>> +void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
>> + uint32_t aofs, int64_t c,
>> + uint32_t oprsz, uint32_t maxsz)
>> +{
>> + TCGv_i64 tmp = tcg_constant_i64(c);
>> + tcg_gen_gvec_cmps(cond, vece, dofs, aofs, tmp, oprsz, maxsz);
>> +}
>> +
>> static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b,
>> TCGv_i64 c)
>> {
>> TCGv_i64 t = tcg_temp_ebb_new_i64();
>>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 2/2] target/arm: Use tcg_gen_gvec_cmpi for compare vs 0
2023-08-31 3:09 ` [PATCH 2/2] target/arm: Use tcg_gen_gvec_cmpi for compare vs 0 Richard Henderson
@ 2023-09-07 12:13 ` gaosong
0 siblings, 0 replies; 7+ messages in thread
From: gaosong @ 2023-09-07 12:13 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
在 2023/8/31 上午11:09, Richard Henderson 写道:
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> target/arm/tcg/translate.c | 56 ++++++--------------------------------
> 1 file changed, 9 insertions(+), 47 deletions(-)
>
> diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
> index 38ad8dd4bd..89a7392ed3 100644
> --- a/target/arm/tcg/translate.c
> +++ b/target/arm/tcg/translate.c
Reviewed-by: Song Gao <gaosong@loongson.cn>
Thanks.
Song Gao
> @@ -2943,54 +2943,16 @@ void gen_gvec_sqrdmlsh_qc(unsigned vece, uint32_t rd_ofs, uint32_t rn_ofs,
> gen_gvec_fn3_qc(rd_ofs, rn_ofs, rm_ofs, opr_sz, max_sz, fns[vece - 1]);
> }
>
> -#define GEN_CMP0(NAME, COND) \
> - static void gen_##NAME##0_i32(TCGv_i32 d, TCGv_i32 a) \
> - { \
> - tcg_gen_negsetcond_i32(COND, d, a, tcg_constant_i32(0)); \
> - } \
> - static void gen_##NAME##0_i64(TCGv_i64 d, TCGv_i64 a) \
> - { \
> - tcg_gen_negsetcond_i64(COND, d, a, tcg_constant_i64(0)); \
> - } \
> - static void gen_##NAME##0_vec(unsigned vece, TCGv_vec d, TCGv_vec a) \
> - { \
> - TCGv_vec zero = tcg_constant_vec_matching(d, vece, 0); \
> - tcg_gen_cmp_vec(COND, vece, d, a, zero); \
> - } \
> - void gen_gvec_##NAME##0(unsigned vece, uint32_t d, uint32_t m, \
> - uint32_t opr_sz, uint32_t max_sz) \
> - { \
> - const GVecGen2 op[4] = { \
> - { .fno = gen_helper_gvec_##NAME##0_b, \
> - .fniv = gen_##NAME##0_vec, \
> - .opt_opc = vecop_list_cmp, \
> - .vece = MO_8 }, \
> - { .fno = gen_helper_gvec_##NAME##0_h, \
> - .fniv = gen_##NAME##0_vec, \
> - .opt_opc = vecop_list_cmp, \
> - .vece = MO_16 }, \
> - { .fni4 = gen_##NAME##0_i32, \
> - .fniv = gen_##NAME##0_vec, \
> - .opt_opc = vecop_list_cmp, \
> - .vece = MO_32 }, \
> - { .fni8 = gen_##NAME##0_i64, \
> - .fniv = gen_##NAME##0_vec, \
> - .opt_opc = vecop_list_cmp, \
> - .prefer_i64 = TCG_TARGET_REG_BITS == 64, \
> - .vece = MO_64 }, \
> - }; \
> - tcg_gen_gvec_2(d, m, opr_sz, max_sz, &op[vece]); \
> - }
> +#define GEN_CMP0(NAME, COND) \
> + void NAME(unsigned vece, uint32_t d, uint32_t m, \
> + uint32_t opr_sz, uint32_t max_sz) \
> + { tcg_gen_gvec_cmpi(COND, vece, d, m, 0, opr_sz, max_sz); }
>
> -static const TCGOpcode vecop_list_cmp[] = {
> - INDEX_op_cmp_vec, 0
> -};
> -
> -GEN_CMP0(ceq, TCG_COND_EQ)
> -GEN_CMP0(cle, TCG_COND_LE)
> -GEN_CMP0(cge, TCG_COND_GE)
> -GEN_CMP0(clt, TCG_COND_LT)
> -GEN_CMP0(cgt, TCG_COND_GT)
> +GEN_CMP0(gen_gvec_ceq0, TCG_COND_EQ)
> +GEN_CMP0(gen_gvec_cle0, TCG_COND_LE)
> +GEN_CMP0(gen_gvec_cge0, TCG_COND_GE)
> +GEN_CMP0(gen_gvec_clt0, TCG_COND_LT)
> +GEN_CMP0(gen_gvec_cgt0, TCG_COND_GT)
>
> #undef GEN_CMP0
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2] tcg: Add gvec compare with immediate and scalar operand
2023-09-07 12:12 ` gaosong
@ 2023-09-11 12:38 ` gaosong
0 siblings, 0 replies; 7+ messages in thread
From: gaosong @ 2023-09-11 12:38 UTC (permalink / raw)
To: Richard Henderson, qemu-devel
在 2023/9/7 下午8:12, gaosong 写道:
> 在 2023/9/7 下午3:39, gaosong 写道:
>> Hi, Richard
>> 在 2023/8/31 上午11:09, Richard Henderson 写道:
>>> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
>>> ---
>>> accel/tcg/tcg-runtime.h | 25 ++++++
>>> include/tcg/tcg-op-gvec-common.h | 6 ++
>>> accel/tcg/tcg-runtime-gvec.c | 26 ++++++
>>> tcg/tcg-op-gvec.c | 150 +++++++++++++++++++++++++++++++
>>> 4 files changed, 207 insertions(+)
>>>
>>
>> I use tcg_gen_gvec_cmps for LoongArch vector cmp instructions. but I
>> got an Aborted error from temp_load(). I'll fixes this later.
>>
> My mistaken, It's work well.
> I will use tcg_gen_gvec_cmps on LoongArch's LASX series.
>
> For this patch:
> Tested-by: Song Gao <gaosong@loongson.cn>
> Reviewed-by: Song Gao <gaosong@loongson.cn>
>
> Thanks.
> Song Gao
>
>> And I'll send LASX V5 series. this series will not use tcg_gen_gvec_cmps.
>>
>> Thanks.
>> Song Gao
>>
Oh, It's my tested not enough, I got an temp_load Aborted again.
So I Look this patch more carefully, And find a typo.
>>> diff --git a/accel/tcg/tcg-runtime.h b/accel/tcg/tcg-runtime.h
>>> index 186899a2c7..c23b5e66c4 100644
>>> --- a/accel/tcg/tcg-runtime.h
>>> +++ b/accel/tcg/tcg-runtime.h
>>> @@ -297,4 +297,29 @@ DEF_HELPER_FLAGS_4(gvec_leu16, TCG_CALL_NO_RWG,
>>> void, ptr, ptr, ptr, i32)
>>> DEF_HELPER_FLAGS_4(gvec_leu32, TCG_CALL_NO_RWG, void, ptr, ptr,
>>> ptr, i32)
>>> DEF_HELPER_FLAGS_4(gvec_leu64, TCG_CALL_NO_RWG, void, ptr, ptr,
>>> ptr, i32)
>>> +DEF_HELPER_FLAGS_4(gvec_eqs8, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +DEF_HELPER_FLAGS_4(gvec_eqs16, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +DEF_HELPER_FLAGS_4(gvec_eqs32, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +DEF_HELPER_FLAGS_4(gvec_eqs64, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +
>>> +DEF_HELPER_FLAGS_4(gvec_lts8, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +DEF_HELPER_FLAGS_4(gvec_lts16, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +DEF_HELPER_FLAGS_4(gvec_lts32, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +DEF_HELPER_FLAGS_4(gvec_lts64, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +
>>> +DEF_HELPER_FLAGS_4(gvec_les8, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +DEF_HELPER_FLAGS_4(gvec_les16, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +DEF_HELPER_FLAGS_4(gvec_les32, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +DEF_HELPER_FLAGS_4(gvec_les64, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +
>>> +DEF_HELPER_FLAGS_4(gvec_ltus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +DEF_HELPER_FLAGS_4(gvec_ltus16, TCG_CALL_NO_RWG, void, ptr, ptr,
>>> i64, i32)
>>> +DEF_HELPER_FLAGS_4(gvec_ltus32, TCG_CALL_NO_RWG, void, ptr, ptr,
>>> i64, i32)
>>> +DEF_HELPER_FLAGS_4(gvec_ltus64, TCG_CALL_NO_RWG, void, ptr, ptr,
>>> i64, i32)
>>> +
>>> +DEF_HELPER_FLAGS_4(gvec_leus8, TCG_CALL_NO_RWG, void, ptr, ptr, i64,
>>> i32)
>>> +DEF_HELPER_FLAGS_4(gvec_leus16, TCG_CALL_NO_RWG, void, ptr, ptr,
>>> i64, i32)
>>> +DEF_HELPER_FLAGS_4(gvec_leus32, TCG_CALL_NO_RWG, void, ptr, ptr,
>>> i64, i32)
>>> +DEF_HELPER_FLAGS_4(gvec_leus64, TCG_CALL_NO_RWG, void, ptr, ptr,
>>> i64, i32)
>>> +
>>> DEF_HELPER_FLAGS_5(gvec_bitsel, TCG_CALL_NO_RWG, void, ptr, ptr,
>>> ptr, ptr, i32)
>>> diff --git a/include/tcg/tcg-op-gvec-common.h
>>> b/include/tcg/tcg-op-gvec-common.h
>>> index e2683d487f..4db8a58c14 100644
>>> --- a/include/tcg/tcg-op-gvec-common.h
>>> +++ b/include/tcg/tcg-op-gvec-common.h
>>> @@ -374,6 +374,12 @@ void tcg_gen_gvec_rotrv(unsigned vece, uint32_t
>>> dofs, uint32_t aofs,
>>> void tcg_gen_gvec_cmp(TCGCond cond, unsigned vece, uint32_t dofs,
>>> uint32_t aofs, uint32_t bofs,
>>> uint32_t oprsz, uint32_t maxsz);
>>> +void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
>>> + uint32_t aofs, int64_t c,
>>> + uint32_t oprsz, uint32_t maxsz);
>>> +void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
>>> + uint32_t aofs, TCGv_i64 c,
>>> + uint32_t oprsz, uint32_t maxsz);
>>> /*
>>> * Perform vector bit select: d = (b & a) | (c & ~a).
>>> diff --git a/accel/tcg/tcg-runtime-gvec.c b/accel/tcg/tcg-runtime-gvec.c
>>> index 6c99f952ca..afca89baa1 100644
>>> --- a/accel/tcg/tcg-runtime-gvec.c
>>> +++ b/accel/tcg/tcg-runtime-gvec.c
>>> @@ -1042,6 +1042,32 @@ DO_CMP2(64)
>>> #undef DO_CMP1
>>> #undef DO_CMP2
>>> +#define DO_CMP1(NAME, TYPE,
>>> OP) \
>>> +void HELPER(NAME)(void *d, void *a, uint64_t b64, uint32_t
>>> desc) \
>>> +{ \
>>> + intptr_t oprsz =
>>> simd_oprsz(desc); \
>>> + TYPE inv = simd_data(desc), b =
>>> b64; \
>>> + for (intptr_t i = 0; i < oprsz; i += sizeof(TYPE))
>>> { \
>>> + *(TYPE *)(d + i) = -((*(TYPE *)(a + i) OP b) ^
>>> inv); \
>>> +
>>> } \
>>> + clear_high(d, oprsz,
>>> desc); \
>>> +}
>>> +
>>> +#define DO_CMP2(SZ) \
>>> + DO_CMP1(gvec_eqs##SZ, uint##SZ##_t, ==) \
>>> + DO_CMP1(gvec_lts##SZ, int##SZ##_t, <) \
>>> + DO_CMP1(gvec_les##SZ, int##SZ##_t, <=) \
>>> + DO_CMP1(gvec_ltus##SZ, uint##SZ##_t, <) \
>>> + DO_CMP1(gvec_leus##SZ, uint##SZ##_t, <=)
>>> +
>>> +DO_CMP2(8)
>>> +DO_CMP2(16)
>>> +DO_CMP2(32)
>>> +DO_CMP2(64)
>>> +
>>> +#undef DO_CMP1
>>> +#undef DO_CMP2
>>> +
>>> void HELPER(gvec_ssadd8)(void *d, void *a, void *b, uint32_t desc)
>>> {
>>> intptr_t oprsz = simd_oprsz(desc);
>>> diff --git a/tcg/tcg-op-gvec.c b/tcg/tcg-op-gvec.c
>>> index f5cfd9bf99..f7ca9e1051 100644
>>> --- a/tcg/tcg-op-gvec.c
>>> +++ b/tcg/tcg-op-gvec.c
>>> @@ -3819,6 +3819,156 @@ void tcg_gen_gvec_cmp(TCGCond cond, unsigned
>>> vece, uint32_t dofs,
>>> }
>>> }
>>> +void tcg_gen_gvec_cmps(TCGCond cond, unsigned vece, uint32_t dofs,
>>> + uint32_t aofs, TCGv_i64 c,
>>> + uint32_t oprsz, uint32_t maxsz)
>>> +{
>>> + static const TCGOpcode cmp_list[] = { INDEX_op_cmp_vec, 0 };
>>> + static gen_helper_gvec_2i * const eq_fn[4] = {
>>> + gen_helper_gvec_eqs8, gen_helper_gvec_eqs16,
>>> + gen_helper_gvec_eqs32, gen_helper_gvec_eqs64
>>> + };
>>> + static gen_helper_gvec_2i * const lt_fn[4] = {
>>> + gen_helper_gvec_lts8, gen_helper_gvec_lts16,
>>> + gen_helper_gvec_lts32, gen_helper_gvec_lts64
>>> + };
>>> + static gen_helper_gvec_2i * const le_fn[4] = {
>>> + gen_helper_gvec_les8, gen_helper_gvec_les16,
>>> + gen_helper_gvec_les32, gen_helper_gvec_les64
>>> + };
>>> + static gen_helper_gvec_2i * const ltu_fn[4] = {
>>> + gen_helper_gvec_ltus8, gen_helper_gvec_ltus16,
>>> + gen_helper_gvec_ltus32, gen_helper_gvec_ltus64
>>> + };
>>> + static gen_helper_gvec_2i * const leu_fn[4] = {
>>> + gen_helper_gvec_leus8, gen_helper_gvec_leus16,
>>> + gen_helper_gvec_leus32, gen_helper_gvec_leus64
>>> + };
>>> + static gen_helper_gvec_2i * const * const fns[16] = {
>>> + [TCG_COND_EQ] = eq_fn,
>>> + [TCG_COND_LT] = lt_fn,
>>> + [TCG_COND_LE] = le_fn,
>>> + [TCG_COND_LTU] = ltu_fn,
>>> + [TCG_COND_LEU] = leu_fn,
>>> + };
>>> +
>>> + TCGType type;
>>> +
>>> + check_size_align(oprsz, maxsz, dofs | aofs);
>>> + check_overlap_2(dofs, aofs, maxsz);
>>> +
>>> + if (cond == TCG_COND_NEVER || cond == TCG_COND_ALWAYS) {
>>> + do_dup(MO_8, dofs, oprsz, maxsz,
>>> + NULL, NULL, -(cond == TCG_COND_ALWAYS));
>>> + return;
>>> + }
>>> +
>>> + /*
>>> + * Implement inline with a vector type, if possible.
>>> + * Prefer integer when 64-bit host and 64-bit comparison.
>>> + */
>>> + type = choose_vector_type(cmp_list, vece, oprsz,
>>> + TCG_TARGET_REG_BITS == 64 && vece ==
>>> MO_64);
>>> + if (type != 0) {
>>> + const TCGOpcode *hold_list = tcg_swap_vecop_list(cmp_list);
>>> + TCGv_vec t_vec = tcg_temp_new_vec(type);
>>> + uint32_t some, i;
>>> +
>>> + tcg_gen_dup_i64_vec(vece, t_vec, c);
>>> +
>>> + switch (type) {
>>> + case TCG_TYPE_V256:
>>> + some = QEMU_ALIGN_DOWN(oprsz, 32);
>>> + for (i = 0; i < some; i += 32) {
>>> + TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V256);
>>> + TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V256);
>>> + tcg_gen_ld_vec(t0, cpu_env, aofs);
Typo, This should be t1.
>>> + tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
>>> + tcg_gen_st_vec(t0, cpu_env, dofs);
>>> + aofs += 32;
>>> + dofs += 32;
>>> + }
>>> + oprsz -= some;
>>> + maxsz -= some;
>>> + /* fallthru */
>>> +
>>> + case TCG_TYPE_V128:
>>> + some = QEMU_ALIGN_DOWN(oprsz, 16);
>>> + for (i = 0; i < some; i += 16) {
>>> + TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V128);
>>> + TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V128);
>>> + tcg_gen_ld_vec(t0, cpu_env, aofs + i);
Likewise.
>>> + tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
>>> + tcg_gen_st_vec(t0, cpu_env, dofs + i);
>>> + }
>>> + break;
>>> +
>>> + case TCG_TYPE_V64:
>>> + some = QEMU_ALIGN_DOWN(oprsz, 8);
>>> + for (i = 0; i < some; i += 8) {
>>> + TCGv_vec t0 = tcg_temp_new_vec(TCG_TYPE_V64);
>>> + TCGv_vec t1 = tcg_temp_new_vec(TCG_TYPE_V64);
>>> + tcg_gen_ld_vec(t0, cpu_env, aofs + i);
Likewise.
>>> + tcg_gen_cmp_vec(cond, vece, t0, t1, t_vec);
>>> + tcg_gen_st_vec(t0, cpu_env, dofs + i);
>>> + }
How about create expand_cmpi_vec() like expand_cmp_vec()?
Anyway, this patch is very useful.
Thanks.
Song Gao
>>> + break;
>>> +
>>> + default:
>>> + g_assert_not_reached();
>>> + }
>>> + tcg_temp_free_vec(t_vec);
>>> + tcg_swap_vecop_list(hold_list);
>>> + } else if (vece == MO_64 && check_size_impl(oprsz, 8)) {
>>> + TCGv_i64 t0 = tcg_temp_ebb_new_i64();
>>> + uint32_t i;
>>> +
>>> + for (i = 0; i < oprsz; i += 8) {
>>> + tcg_gen_ld_i64(t0, cpu_env, aofs + i);
>>> + tcg_gen_negsetcond_i64(cond, t0, t0, c);
>>> + tcg_gen_st_i64(t0, cpu_env, dofs + i);
>>> + }
>>> + tcg_temp_free_i64(t0);
>>> + } else if (vece == MO_32 && check_size_impl(oprsz, 4)) {
>>> + TCGv_i32 t0 = tcg_temp_ebb_new_i32();
>>> + TCGv_i32 t1 = tcg_temp_ebb_new_i32();
>>> + uint32_t i;
>>> +
>>> + tcg_gen_extrl_i64_i32(t1, c);
>>> + for (i = 0; i < oprsz; i += 8) {
>>> + tcg_gen_ld_i32(t0, cpu_env, aofs + i);
>>> + tcg_gen_negsetcond_i32(cond, t0, t0, t1);
>>> + tcg_gen_st_i32(t0, cpu_env, dofs + i);
>>> + }
>>> + tcg_temp_free_i32(t0);
>>> + tcg_temp_free_i32(t1);
>>> + } else {
>>> + gen_helper_gvec_2i * const *fn = fns[cond];
>>> + bool inv = false;
>>> +
>>> + if (fn == NULL) {
>>> + cond = tcg_invert_cond(cond);
>>> + fn = fns[cond];
>>> + assert(fn != NULL);
>>> + inv = true;
>>> + }
>>> + tcg_gen_gvec_2i_ool(dofs, aofs, c, oprsz, maxsz, inv,
>>> fn[vece]);
>>> + return;
>>> + }
>>> +
>>> + if (oprsz < maxsz) {
>>> + expand_clr(dofs + oprsz, maxsz - oprsz);
>>> + }
>>> +}
>>> +
>>> +void tcg_gen_gvec_cmpi(TCGCond cond, unsigned vece, uint32_t dofs,
>>> + uint32_t aofs, int64_t c,
>>> + uint32_t oprsz, uint32_t maxsz)
>>> +{
>>> + TCGv_i64 tmp = tcg_constant_i64(c);
>>> + tcg_gen_gvec_cmps(cond, vece, dofs, aofs, tmp, oprsz, maxsz);
>>> +}
>>> +
>>> static void tcg_gen_bitsel_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b,
>>> TCGv_i64 c)
>>> {
>>> TCGv_i64 t = tcg_temp_ebb_new_i64();
>>>
>>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2023-09-11 12:39 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-08-31 3:09 [PATCH 0/2] tcg: Add gvec compare with immediate and scalar operand Richard Henderson
2023-08-31 3:09 ` [PATCH 1/2] " Richard Henderson
2023-09-07 7:39 ` gaosong
2023-09-07 12:12 ` gaosong
2023-09-11 12:38 ` gaosong
2023-08-31 3:09 ` [PATCH 2/2] target/arm: Use tcg_gen_gvec_cmpi for compare vs 0 Richard Henderson
2023-09-07 12:13 ` gaosong
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).