From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: qemu-arm@nongnu.org
Subject: [PATCH v2 65/67] target/arm: Convert SQDMULH, SQRDMULH to decodetree
Date: Fri, 24 May 2024 16:21:19 -0700 [thread overview]
Message-ID: <20240524232121.284515-66-richard.henderson@linaro.org> (raw)
In-Reply-To: <20240524232121.284515-1-richard.henderson@linaro.org>
These are the last instructions within disas_simd_three_reg_same
and disas_simd_scalar_three_reg_same, so remove them.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper.h | 10 ++
target/arm/tcg/a64.decode | 18 +++
target/arm/tcg/translate-a64.c | 276 ++++++++++-----------------------
target/arm/tcg/vec_helper.c | 64 ++++++++
4 files changed, 172 insertions(+), 196 deletions(-)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index 85f9302563..24feecee9b 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -968,6 +968,16 @@ DEF_HELPER_FLAGS_5(neon_sqrdmulh_h, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(neon_sqrdmulh_s, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqdmulh_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(neon_sqrdmulh_idx_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_4(sve2_sqdmulh_b, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2_sqdmulh_h, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_4(sve2_sqdmulh_s, TCG_CALL_NO_RWG, void, ptr, ptr, ptr, i32)
diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode
index 2dea68a0a9..f7f897f9fc 100644
--- a/target/arm/tcg/a64.decode
+++ b/target/arm/tcg/a64.decode
@@ -774,6 +774,9 @@ CMHS_s 0111 1110 111 ..... 00111 1 ..... ..... @rrr_d
CMTST_s 0101 1110 111 ..... 10001 1 ..... ..... @rrr_d
CMEQ_s 0111 1110 111 ..... 10001 1 ..... ..... @rrr_d
+SQDMULH_s 0101 1110 ..1 ..... 10110 1 ..... ..... @rrr_e
+SQRDMULH_s 0111 1110 ..1 ..... 10110 1 ..... ..... @rrr_e
+
### Advanced SIMD scalar pairwise
FADDP_s 0101 1110 0011 0000 1101 10 ..... ..... @rr_h
@@ -931,6 +934,9 @@ PMUL_v 0.10 1110 001 ..... 10011 1 ..... ..... @qrrr_b
MLA_v 0.00 1110 ..1 ..... 10010 1 ..... ..... @qrrr_e
MLS_v 0.10 1110 ..1 ..... 10010 1 ..... ..... @qrrr_e
+SQDMULH_v 0.00 1110 ..1 ..... 10110 1 ..... ..... @qrrr_e
+SQRDMULH_v 0.10 1110 ..1 ..... 10110 1 ..... ..... @qrrr_e
+
### Advanced SIMD scalar x indexed element
FMUL_si 0101 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
@@ -949,6 +955,12 @@ FMULX_si 0111 1111 00 .. .... 1001 . 0 ..... ..... @rrx_h
FMULX_si 0111 1111 10 . ..... 1001 . 0 ..... ..... @rrx_s
FMULX_si 0111 1111 11 0 ..... 1001 . 0 ..... ..... @rrx_d
+SQDMULH_si 0101 1111 01 .. .... 1100 . 0 ..... ..... @rrx_h
+SQDMULH_si 0101 1111 10 .. .... 1100 . 0 ..... ..... @rrx_s
+
+SQRDMULH_si 0101 1111 01 .. .... 1101 . 0 ..... ..... @rrx_h
+SQRDMULH_si 0101 1111 10 . ..... 1101 . 0 ..... ..... @rrx_s
+
### Advanced SIMD vector x indexed element
FMUL_vi 0.00 1111 00 .. .... 1001 . 0 ..... ..... @qrrx_h
@@ -980,3 +992,9 @@ MLA_vi 0.10 1111 10 . ..... 0000 . 0 ..... ..... @qrrx_s
MLS_vi 0.10 1111 01 .. .... 0100 . 0 ..... ..... @qrrx_h
MLS_vi 0.10 1111 10 . ..... 0100 . 0 ..... ..... @qrrx_s
+
+SQDMULH_vi 0.00 1111 01 .. .... 1100 . 0 ..... ..... @qrrx_h
+SQDMULH_vi 0.00 1111 10 . ..... 1100 . 0 ..... ..... @qrrx_s
+
+SQRDMULH_vi 0.00 1111 01 .. .... 1101 . 0 ..... ..... @qrrx_h
+SQRDMULH_vi 0.00 1111 10 . ..... 1101 . 0 ..... ..... @qrrx_s
diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c
index c673b95ec7..14226c56cf 100644
--- a/target/arm/tcg/translate-a64.c
+++ b/target/arm/tcg/translate-a64.c
@@ -1350,6 +1350,14 @@ static bool do_gvec_fn3_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
return true;
}
+static bool do_gvec_fn3_no8_no64(DisasContext *s, arg_qrrr_e *a, GVecGen3Fn *fn)
+{
+ if (a->esz == MO_8) {
+ return false;
+ }
+ return do_gvec_fn3_no64(s, a, fn);
+}
+
static bool do_gvec_fn4(DisasContext *s, arg_qrrrr_e *a, GVecGen4Fn *fn)
{
if (!a->q && a->esz == MO_64) {
@@ -5167,6 +5175,25 @@ static const ENVScalar2 f_scalar_uqrshl = {
};
TRANS(UQRSHL_s, do_env_scalar2, a, &f_scalar_uqrshl)
+static bool do_env_scalar2_hs(DisasContext *s, arg_rrr_e *a,
+ const ENVScalar2 *f)
+{
+ if (a->esz == MO_16 || a->esz == MO_32) {
+ return do_env_scalar2(s, a, f);
+ }
+ return false;
+}
+
+static const ENVScalar2 f_scalar_sqdmulh = {
+ { NULL, gen_helper_neon_qdmulh_s16, gen_helper_neon_qdmulh_s32 }
+};
+TRANS(SQDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqdmulh)
+
+static const ENVScalar2 f_scalar_sqrdmulh = {
+ { NULL, gen_helper_neon_qrdmulh_s16, gen_helper_neon_qrdmulh_s32 }
+};
+TRANS(SQRDMULH_s, do_env_scalar2_hs, a, &f_scalar_sqrdmulh)
+
static bool do_cmop_d(DisasContext *s, arg_rrr_e *a, TCGCond cond)
{
if (fp_access_check(s)) {
@@ -5482,6 +5509,9 @@ TRANS(CMHS_v, do_cmop_v, a, TCG_COND_GEU)
TRANS(CMEQ_v, do_cmop_v, a, TCG_COND_EQ)
TRANS(CMTST_v, do_gvec_fn3, a, gen_gvec_cmtst)
+TRANS(SQDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqdmulh_qc)
+TRANS(SQRDMULH_v, do_gvec_fn3_no8_no64, a, gen_gvec_sqrdmulh_qc)
+
/*
* Advanced SIMD scalar/vector x indexed element
*/
@@ -5589,6 +5619,27 @@ static bool do_fmla_scalar_idx(DisasContext *s, arg_rrx_e *a, bool neg)
TRANS(FMLA_si, do_fmla_scalar_idx, a, false)
TRANS(FMLS_si, do_fmla_scalar_idx, a, true)
+static bool do_env_scalar2_idx_hs(DisasContext *s, arg_rrx_e *a,
+ const ENVScalar2 *f)
+{
+ if (a->esz < MO_16 || a->esz > MO_32) {
+ return false;
+ }
+ if (fp_access_check(s)) {
+ TCGv_i32 t0 = tcg_temp_new_i32();
+ TCGv_i32 t1 = tcg_temp_new_i32();
+
+ read_vec_element_i32(s, t0, a->rn, 0, a->esz);
+ read_vec_element_i32(s, t1, a->rm, a->idx, a->esz);
+ f->gen_bhs[a->esz](t0, tcg_env, t0, t1);
+ write_fp_sreg(s, a->rd, t0);
+ }
+ return true;
+}
+
+TRANS(SQDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqdmulh)
+TRANS(SQRDMULH_si, do_env_scalar2_idx_hs, a, &f_scalar_sqrdmulh)
+
static bool do_fp3_vector_idx(DisasContext *s, arg_qrrx_e *a,
gen_helper_gvec_3_ptr * const fns[3])
{
@@ -5719,6 +5770,33 @@ static bool do_mla_vector_idx(DisasContext *s, arg_qrrx_e *a, bool sub)
TRANS(MLA_vi, do_mla_vector_idx, a, false)
TRANS(MLS_vi, do_mla_vector_idx, a, true)
+static bool do_int3_qc_vector_idx(DisasContext *s, arg_qrrx_e *a,
+ gen_helper_gvec_4 * const fns[2])
+{
+ assert(a->esz == MO_16 || a->esz == MO_32);
+ if (fp_access_check(s)) {
+ tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
+ vec_full_reg_offset(s, a->rn),
+ vec_full_reg_offset(s, a->rm),
+ offsetof(CPUARMState, vfp.qc),
+ a->q ? 16 : 8, vec_full_reg_size(s),
+ a->idx, fns[a->esz - 1]);
+ }
+ return true;
+}
+
+static gen_helper_gvec_4 * const f_vector_idx_sqdmulh[2] = {
+ gen_helper_neon_sqdmulh_idx_h,
+ gen_helper_neon_sqdmulh_idx_s,
+};
+TRANS(SQDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqdmulh)
+
+static gen_helper_gvec_4 * const f_vector_idx_sqrdmulh[2] = {
+ gen_helper_neon_sqrdmulh_idx_h,
+ gen_helper_neon_sqrdmulh_idx_s,
+};
+TRANS(SQRDMULH_vi, do_int3_qc_vector_idx, a, f_vector_idx_sqrdmulh)
+
/*
* Advanced SIMD scalar pairwise
*/
@@ -9500,109 +9578,6 @@ static void disas_simd_scalar_three_reg_diff(DisasContext *s, uint32_t insn)
}
}
-/* AdvSIMD scalar three same
- * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
- * +-----+---+-----------+------+---+------+--------+---+------+------+
- * | 0 1 | U | 1 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
- * +-----+---+-----------+------+---+------+--------+---+------+------+
- */
-static void disas_simd_scalar_three_reg_same(DisasContext *s, uint32_t insn)
-{
- int rd = extract32(insn, 0, 5);
- int rn = extract32(insn, 5, 5);
- int opcode = extract32(insn, 11, 5);
- int rm = extract32(insn, 16, 5);
- int size = extract32(insn, 22, 2);
- bool u = extract32(insn, 29, 1);
- TCGv_i64 tcg_rd;
-
- switch (opcode) {
- case 0x16: /* SQDMULH, SQRDMULH (vector) */
- if (size != 1 && size != 2) {
- unallocated_encoding(s);
- return;
- }
- break;
- default:
- case 0x1: /* SQADD, UQADD */
- case 0x5: /* SQSUB, UQSUB */
- case 0x6: /* CMGT, CMHI */
- case 0x7: /* CMGE, CMHS */
- case 0x8: /* SSHL, USHL */
- case 0x9: /* SQSHL, UQSHL */
- case 0xa: /* SRSHL, URSHL */
- case 0xb: /* SQRSHL, UQRSHL */
- case 0x10: /* ADD, SUB (vector) */
- case 0x11: /* CMTST, CMEQ */
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- tcg_rd = tcg_temp_new_i64();
-
- if (size == 3) {
- g_assert_not_reached();
- } else {
- /* Do a single operation on the lowest element in the vector.
- * We use the standard Neon helpers and rely on 0 OP 0 == 0 with
- * no side effects for all these operations.
- * OPTME: special-purpose helpers would avoid doing some
- * unnecessary work in the helper for the 8 and 16 bit cases.
- */
- NeonGenTwoOpEnvFn *genenvfn = NULL;
- void (*genfn)(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_i64, MemOp) = NULL;
-
- switch (opcode) {
- case 0x16: /* SQDMULH, SQRDMULH */
- {
- static NeonGenTwoOpEnvFn * const fns[2][2] = {
- { gen_helper_neon_qdmulh_s16, gen_helper_neon_qrdmulh_s16 },
- { gen_helper_neon_qdmulh_s32, gen_helper_neon_qrdmulh_s32 },
- };
- assert(size == 1 || size == 2);
- genenvfn = fns[size - 1][u];
- break;
- }
- default:
- case 0x1: /* SQADD, UQADD */
- case 0x5: /* SQSUB, UQSUB */
- case 0x9: /* SQSHL, UQSHL */
- case 0xb: /* SQRSHL, UQRSHL */
- g_assert_not_reached();
- }
-
- if (genenvfn) {
- TCGv_i32 tcg_rn = tcg_temp_new_i32();
- TCGv_i32 tcg_rm = tcg_temp_new_i32();
-
- read_vec_element_i32(s, tcg_rn, rn, 0, size);
- read_vec_element_i32(s, tcg_rm, rm, 0, size);
- genenvfn(tcg_rn, tcg_env, tcg_rn, tcg_rm);
- tcg_gen_extu_i32_i64(tcg_rd, tcg_rn);
- } else {
- TCGv_i64 tcg_rn = tcg_temp_new_i64();
- TCGv_i64 tcg_rm = tcg_temp_new_i64();
- TCGv_i64 qc = tcg_temp_new_i64();
-
- read_vec_element(s, tcg_rn, rn, 0, size | (u ? 0 : MO_SIGN));
- read_vec_element(s, tcg_rm, rm, 0, size | (u ? 0 : MO_SIGN));
- tcg_gen_ld_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
- genfn(tcg_rd, qc, tcg_rn, tcg_rm, size);
- tcg_gen_st_i64(qc, tcg_env, offsetof(CPUARMState, vfp.qc));
- if (!u) {
- /* Truncate signed 64-bit result for writeback. */
- tcg_gen_ext_i64(tcg_rd, tcg_rd, size);
- }
- }
- }
-
- write_fp_dreg(s, rd, tcg_rd);
-}
-
/* AdvSIMD scalar three same extra
* 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
* +-----+---+-----------+------+---+------+---+--------+---+----+----+
@@ -10940,94 +10915,6 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
}
}
-/* Integer op subgroup of C3.6.16. */
-static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
-{
- int is_q = extract32(insn, 30, 1);
- int u = extract32(insn, 29, 1);
- int size = extract32(insn, 22, 2);
- int opcode = extract32(insn, 11, 5);
- int rm = extract32(insn, 16, 5);
- int rn = extract32(insn, 5, 5);
- int rd = extract32(insn, 0, 5);
-
- switch (opcode) {
- case 0x16: /* SQDMULH, SQRDMULH */
- if (size == 0 || size == 3) {
- unallocated_encoding(s);
- return;
- }
- break;
- default:
- if (size == 3 && !is_q) {
- unallocated_encoding(s);
- return;
- }
- break;
-
- case 0x0: /* SHADD, UHADD */
- case 0x01: /* SQADD, UQADD */
- case 0x02: /* SRHADD, URHADD */
- case 0x04: /* SHSUB, UHSUB */
- case 0x05: /* SQSUB, UQSUB */
- case 0x06: /* CMGT, CMHI */
- case 0x07: /* CMGE, CMHS */
- case 0x08: /* SSHL, USHL */
- case 0x09: /* SQSHL, UQSHL */
- case 0x0a: /* SRSHL, URSHL */
- case 0x0b: /* SQRSHL, UQRSHL */
- case 0x0c: /* SMAX, UMAX */
- case 0x0d: /* SMIN, UMIN */
- case 0x0e: /* SABD, UABD */
- case 0x0f: /* SABA, UABA */
- case 0x10: /* ADD, SUB */
- case 0x11: /* CMTST, CMEQ */
- case 0x12: /* MLA, MLS */
- case 0x13: /* MUL, PMUL */
- unallocated_encoding(s);
- return;
- }
-
- if (!fp_access_check(s)) {
- return;
- }
-
- switch (opcode) {
- case 0x16: /* SQDMULH, SQRDMULH */
- if (u) {
- gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqrdmulh_qc, size);
- } else {
- gen_gvec_fn3(s, is_q, rd, rn, rm, gen_gvec_sqdmulh_qc, size);
- }
- return;
- }
- g_assert_not_reached();
-}
-
-/* AdvSIMD three same
- * 31 30 29 28 24 23 22 21 20 16 15 11 10 9 5 4 0
- * +---+---+---+-----------+------+---+------+--------+---+------+------+
- * | 0 | Q | U | 0 1 1 1 0 | size | 1 | Rm | opcode | 1 | Rn | Rd |
- * +---+---+---+-----------+------+---+------+--------+---+------+------+
- */
-static void disas_simd_three_reg_same(DisasContext *s, uint32_t insn)
-{
- int opcode = extract32(insn, 11, 5);
-
- switch (opcode) {
- default:
- disas_simd_3same_int(s, insn);
- break;
- case 0x3: /* logic ops */
- case 0x14: /* SMAXP, UMAXP */
- case 0x15: /* SMINP, UMINP */
- case 0x17: /* ADDP */
- case 0x18 ... 0x31: /* floating point ops */
- unallocated_encoding(s);
- break;
- }
-}
-
/* AdvSIMD three same extra
* 31 30 29 28 24 23 22 21 20 16 15 14 11 10 9 5 4 0
* +---+---+---+-----------+------+---+------+---+--------+---+----+----+
@@ -12214,9 +12101,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
case 0x0b: /* SQDMULL, SQDMULL2 */
is_long = true;
break;
- case 0x0c: /* SQDMULH */
- case 0x0d: /* SQRDMULH */
- break;
case 0x1d: /* SQRDMLAH */
case 0x1f: /* SQRDMLSH */
if (!dc_isar_feature(aa64_rdm, s)) {
@@ -12278,6 +12162,8 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
case 0x05: /* FMLS */
case 0x08: /* MUL */
case 0x09: /* FMUL */
+ case 0x0c: /* SQDMULH */
+ case 0x0d: /* SQRDMULH */
case 0x10: /* MLA */
case 0x14: /* MLS */
case 0x18: /* FMLAL2 */
@@ -12683,7 +12569,6 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
*/
static const AArch64DecodeTable data_proc_simd[] = {
/* pattern , mask , fn */
- { 0x0e200400, 0x9f200400, disas_simd_three_reg_same },
{ 0x0e008400, 0x9f208400, disas_simd_three_reg_same_extra },
{ 0x0e200000, 0x9f200c00, disas_simd_three_reg_diff },
{ 0x0e200800, 0x9f3e0c00, disas_simd_two_reg_misc },
@@ -12695,7 +12580,6 @@ static const AArch64DecodeTable data_proc_simd[] = {
{ 0x0e000000, 0xbf208c00, disas_simd_tb },
{ 0x0e000800, 0xbf208c00, disas_simd_zip_trn },
{ 0x2e000000, 0xbf208400, disas_simd_ext },
- { 0x5e200400, 0xdf200400, disas_simd_scalar_three_reg_same },
{ 0x5e008400, 0xdf208400, disas_simd_scalar_three_reg_same_extra },
{ 0x5e200000, 0xdf200c00, disas_simd_scalar_three_reg_diff },
{ 0x5e200800, 0xdf3e0c00, disas_simd_scalar_two_reg_misc },
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index d8e96386be..b05922b425 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -311,6 +311,38 @@ void HELPER(neon_sqrdmulh_h)(void *vd, void *vn, void *vm,
clear_tail(d, opr_sz, simd_maxsz(desc));
}
+void HELPER(neon_sqdmulh_idx_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+
+ for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+ int16_t mm = m[i];
+ for (j = 0; j < 16 / 2; ++j) {
+ d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, false, vq);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_idx_h)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int16_t *d = vd, *n = vn, *m = (int16_t *)vm + H2(idx);
+
+ for (i = 0; i < opr_sz / 2; i += 16 / 2) {
+ int16_t mm = m[i];
+ for (j = 0; j < 16 / 2; ++j) {
+ d[i + j] = do_sqrdmlah_h(n[i + j], mm, 0, false, true, vq);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
void HELPER(sve2_sqrdmlah_h)(void *vd, void *vn, void *vm,
void *va, uint32_t desc)
{
@@ -474,6 +506,38 @@ void HELPER(neon_sqrdmulh_s)(void *vd, void *vn, void *vm,
clear_tail(d, opr_sz, simd_maxsz(desc));
}
+void HELPER(neon_sqdmulh_idx_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+
+ for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+ int32_t mm = m[i];
+ for (j = 0; j < 16 / 4; ++j) {
+ d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, false, vq);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
+void HELPER(neon_sqrdmulh_idx_s)(void *vd, void *vn, void *vm,
+ void *vq, uint32_t desc)
+{
+ intptr_t i, j, opr_sz = simd_oprsz(desc);
+ int idx = simd_data(desc);
+ int32_t *d = vd, *n = vn, *m = (int32_t *)vm + H4(idx);
+
+ for (i = 0; i < opr_sz / 4; i += 16 / 4) {
+ int32_t mm = m[i];
+ for (j = 0; j < 16 / 4; ++j) {
+ d[i + j] = do_sqrdmlah_s(n[i + j], mm, 0, false, true, vq);
+ }
+ }
+ clear_tail(d, opr_sz, simd_maxsz(desc));
+}
+
void HELPER(sve2_sqrdmlah_s)(void *vd, void *vn, void *vm,
void *va, uint32_t desc)
{
--
2.34.1
next prev parent reply other threads:[~2024-05-24 23:27 UTC|newest]
Thread overview: 114+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-05-24 23:20 [PATCH v2 00/67] target/arm: Convert a64 advsimd to decodetree (part 1) Richard Henderson
2024-05-24 23:20 ` [PATCH v2 01/67] target/arm: Add neoverse-n1 to qemu-arm (DO NOT MERGE) Richard Henderson
2024-05-24 23:20 ` [PATCH v2 02/67] target/arm: Use PLD, PLDW, PLI not NOP for t32 Richard Henderson
2024-05-28 13:13 ` Peter Maydell
2024-05-24 23:20 ` [PATCH v2 03/67] target/arm: Reject incorrect operands to PLD, PLDW, PLI Richard Henderson
2024-05-28 13:18 ` Peter Maydell
2024-05-28 17:36 ` Richard Henderson
2024-05-29 13:32 ` Peter Maydell
2024-05-29 17:39 ` Richard Henderson
2024-05-24 23:20 ` [PATCH v2 04/67] target/arm: Zero-extend writeback for fp16 FCVTZS (scalar, integer) Richard Henderson
2024-05-28 12:48 ` Peter Maydell
2024-05-24 23:20 ` [PATCH v2 05/67] target/arm: Fix decode of FMOV (hp) vs MOVI Richard Henderson
2024-05-28 12:34 ` Peter Maydell
2024-05-24 23:20 ` [PATCH v2 06/67] target/arm: Verify sz=0 for Advanced SIMD scalar pairwise (fp16) Richard Henderson
2024-05-28 12:25 ` Peter Maydell
2024-05-24 23:20 ` [PATCH v2 07/67] target/arm: Split out gengvec.c Richard Henderson
2024-05-24 23:20 ` [PATCH v2 08/67] target/arm: Split out gengvec64.c Richard Henderson
2024-05-24 23:20 ` [PATCH v2 09/67] target/arm: Convert Cryptographic AES to decodetree Richard Henderson
2024-05-24 23:20 ` [PATCH v2 10/67] target/arm: Convert Cryptographic 3-register SHA " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 11/67] target/arm: Convert Cryptographic 2-register " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 12/67] target/arm: Convert Cryptographic 3-register SHA512 " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 13/67] target/arm: Convert Cryptographic 2-register " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 14/67] target/arm: Convert Cryptographic 4-register " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 15/67] target/arm: Convert Cryptographic 3-register, imm2 " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 16/67] target/arm: Convert XAR " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 17/67] target/arm: Convert Advanced SIMD copy " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 18/67] target/arm: Convert FMULX " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 19/67] target/arm: Convert FADD, FSUB, FDIV, FMUL " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 20/67] target/arm: Convert FMAX, FMIN, FMAXNM, FMINNM " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 21/67] target/arm: Introduce vfp_load_reg16 Richard Henderson
2024-05-28 12:22 ` Peter Maydell
2024-05-24 23:20 ` [PATCH v2 22/67] target/arm: Expand vfp neg and abs inline Richard Henderson
2024-05-24 23:20 ` [PATCH v2 23/67] target/arm: Convert FNMUL to decodetree Richard Henderson
2024-05-24 23:20 ` [PATCH v2 24/67] target/arm: Convert FMLA, FMLS " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 25/67] target/arm: Convert FCMEQ, FCMGE, FCMGT, FACGE, FACGT " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 26/67] target/arm: Convert FABD " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 27/67] target/arm: Convert FRECPS, FRSQRTS " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 28/67] target/arm: Convert FADDP " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 29/67] target/arm: Convert FMAXP, FMINP, FMAXNMP, FMINNMP " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 30/67] target/arm: Use gvec for neon faddp, fmaxp, fminp Richard Henderson
2024-05-24 23:20 ` [PATCH v2 31/67] target/arm: Convert ADDP to decodetree Richard Henderson
2024-05-24 23:20 ` [PATCH v2 32/67] target/arm: Use gvec for neon padd Richard Henderson
2024-05-24 23:20 ` [PATCH v2 33/67] target/arm: Convert SMAXP, SMINP, UMAXP, UMINP to decodetree Richard Henderson
2024-05-24 23:20 ` [PATCH v2 34/67] target/arm: Use gvec for neon pmax, pmin Richard Henderson
2024-05-24 23:20 ` [PATCH v2 35/67] target/arm: Convert FMLAL, FMLSL to decodetree Richard Henderson
2024-05-24 23:20 ` [PATCH v2 36/67] target/arm: Convert disas_simd_3same_logic " Richard Henderson
2024-05-24 23:20 ` [PATCH v2 37/67] target/arm: Improve vector UQADD, UQSUB, SQADD, SQSUB Richard Henderson
2024-05-28 15:24 ` Peter Maydell
2024-05-24 23:20 ` [PATCH v2 38/67] target/arm: Convert SUQADD and USQADD to gvec Richard Henderson
2024-05-28 15:37 ` Peter Maydell
2024-05-28 17:41 ` Richard Henderson
2024-05-24 23:20 ` [PATCH v2 39/67] target/arm: Inline scalar SUQADD and USQADD Richard Henderson
2024-05-28 15:40 ` Peter Maydell
2024-05-24 23:20 ` [PATCH v2 40/67] target/arm: Inline scalar SQADD, UQADD, SQSUB, UQSUB Richard Henderson
2024-05-28 15:42 ` Peter Maydell
2024-05-24 23:20 ` [PATCH v2 41/67] target/arm: Convert SQADD, SQSUB, UQADD, UQSUB to decodetree Richard Henderson
2024-05-28 15:44 ` Peter Maydell
2024-05-24 23:20 ` [PATCH v2 42/67] target/arm: Convert SUQADD, USQADD " Richard Henderson
2024-05-28 15:44 ` Peter Maydell
2024-05-24 23:20 ` [PATCH v2 43/67] target/arm: Convert SSHL, USHL " Richard Henderson
2024-05-28 15:46 ` Peter Maydell
2024-05-24 23:20 ` [PATCH v2 44/67] target/arm: Convert SRSHL and URSHL (register) to gvec Richard Henderson
2024-05-28 15:51 ` Peter Maydell
2024-05-28 17:30 ` Richard Henderson
2024-05-24 23:20 ` [PATCH v2 45/67] target/arm: Convert SRSHL, URSHL to decodetree Richard Henderson
2024-05-28 15:51 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 46/67] target/arm: Convert SQSHL and UQSHL (register) to gvec Richard Henderson
2024-05-28 15:53 ` Peter Maydell
2024-05-28 17:31 ` Richard Henderson
2024-05-24 23:21 ` [PATCH v2 47/67] target/arm: Convert SQSHL, UQSHL to decodetree Richard Henderson
2024-05-28 15:53 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 48/67] target/arm: Convert SQRSHL and UQRSHL (register) to gvec Richard Henderson
2024-05-28 15:54 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 49/67] target/arm: Convert SQRSHL, UQRSHL to decodetree Richard Henderson
2024-05-28 15:55 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 50/67] target/arm: Convert ADD, SUB (vector) " Richard Henderson
2024-05-28 15:56 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 51/67] target/arm: Convert CMGT, CMHI, CMGE, CMHS, CMTST, CMEQ " Richard Henderson
2024-05-28 15:57 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 52/67] target/arm: Use TCG_COND_TSTNE in gen_cmtst_{i32, i64} Richard Henderson
2024-05-28 15:58 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 53/67] target/arm: Use TCG_COND_TSTNE in gen_cmtst_vec Richard Henderson
2024-05-28 15:59 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 54/67] target/arm: Convert SHADD, UHADD to gvec Richard Henderson
2024-05-28 16:01 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 55/67] target/arm: Convert SHADD, UHADD to decodetree Richard Henderson
2024-05-28 16:01 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 56/67] target/arm: Convert SHSUB, UHSUB to gvec Richard Henderson
2024-05-28 16:02 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 57/67] target/arm: Convert SHSUB, UHSUB to decodetree Richard Henderson
2024-05-28 16:02 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 58/67] target/arm: Convert SRHADD, URHADD to gvec Richard Henderson
2024-05-28 16:03 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 59/67] target/arm: Convert SRHADD, URHADD to decodetree Richard Henderson
2024-05-28 16:04 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 60/67] target/arm: Convert SMAX, SMIN, UMAX, UMIN " Richard Henderson
2024-05-28 16:04 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 61/67] target/arm: Convert SABA, SABD, UABA, UABD " Richard Henderson
2024-05-28 16:05 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 62/67] target/arm: Convert MUL, PMUL " Richard Henderson
2024-05-28 16:06 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 63/67] target/arm: Convert MLA, MLS " Richard Henderson
2024-05-28 16:07 ` Peter Maydell
2024-05-24 23:21 ` [PATCH v2 64/67] target/arm: Tidy SQDMULH, SQRDMULH (vector) Richard Henderson
2024-05-28 16:08 ` Peter Maydell
2024-05-24 23:21 ` Richard Henderson [this message]
2024-05-28 16:10 ` [PATCH v2 65/67] target/arm: Convert SQDMULH, SQRDMULH to decodetree Peter Maydell
2024-05-28 17:27 ` Richard Henderson
2024-05-24 23:21 ` [PATCH v2 66/67] target/arm: Convert FMADD, FMSUB, FNMADD, FNMSUB " Richard Henderson
2024-05-28 16:15 ` Peter Maydell
2024-05-28 17:31 ` Richard Henderson
2024-05-24 23:21 ` [PATCH v2 67/67] target/arm: Convert FCSEL " Richard Henderson
2024-05-28 16:16 ` Peter Maydell
2024-05-28 16:20 ` [PATCH v2 00/67] target/arm: Convert a64 advsimd to decodetree (part 1) Peter Maydell
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240524232121.284515-66-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=qemu-arm@nongnu.org \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).