From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:33585) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1WBtIm-0005nQ-Bh for qemu-devel@nongnu.org; Fri, 07 Feb 2014 16:49:37 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1WBtIl-0004vD-9n for qemu-devel@nongnu.org; Fri, 07 Feb 2014 16:49:36 -0500 Received: from mnementh.archaic.org.uk ([2001:8b0:1d0::1]:45625) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1WBtIk-0004u7-VR for qemu-devel@nongnu.org; Fri, 07 Feb 2014 16:49:35 -0500 From: Peter Maydell Date: Fri, 7 Feb 2014 21:49:17 +0000 Message-Id: <1391809763-11251-3-git-send-email-peter.maydell@linaro.org> In-Reply-To: <1391809763-11251-1-git-send-email-peter.maydell@linaro.org> References: <1391809763-11251-1-git-send-email-peter.maydell@linaro.org> Subject: [Qemu-devel] [PATCH 2/8] target-arm: A64: Implement long vector x indexed insns List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: qemu-devel@nongnu.org Cc: Peter Crosthwaite , patches@linaro.org, Michael Matz , Alexander Graf , Claudio Fontana , Dirk Mueller , Will Newton , Laurent Desnogues , =?UTF-8?q?Alex=20Benn=C3=A9e?= , kvmarm@lists.cs.columbia.edu, Christoffer Dall , Richard Henderson Implement the 'long' operations in the vector x indexed element category. Signed-off-by: Peter Maydell --- target-arm/translate-a64.c | 144 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 139 insertions(+), 5 deletions(-) diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index b7f1ecf..64b8c86 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -7906,11 +7906,6 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn) } } - if (is_long) { - unsupported_encoding(s, insn); - return; - } - if (is_fp) { fpst = get_fpstatus_ptr(); } else { @@ -8052,6 +8047,145 @@ static void disas_simd_indexed_vector(DisasContext *s, uint32_t insn) } } else { /* long ops: 16x16->32 or 32x32->64 */ + TCGv_i64 tcg_res[2]; + int pass; + bool satop = extract32(opcode, 0, 1); + TCGMemOp memop = MO_32; + + if (satop || !u) { + memop |= MO_SIGN; + } + + if (size == 2) { + TCGv_i64 tcg_idx = tcg_temp_new_i64(); + + read_vec_element(s, tcg_idx, rm, index, memop); + + for (pass = 0; pass < 2; pass++) { + TCGv_i64 tcg_op = tcg_temp_new_i64(); + TCGv_i64 tcg_passres; + + read_vec_element(s, tcg_op, rn, pass + (is_q * 2), memop); + + tcg_res[pass] = tcg_temp_new_i64(); + + if (opcode == 0xa || opcode == 0xb) { + /* Non-accumulating ops */ + tcg_passres = tcg_res[pass]; + } else { + tcg_passres = tcg_temp_new_i64(); + } + + tcg_gen_mul_i64(tcg_passres, tcg_op, tcg_idx); + tcg_temp_free_i64(tcg_op); + + if (satop) { + /* saturating, doubling */ + gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env, + tcg_passres, tcg_passres); + } + + if (opcode == 0xa || opcode == 0xb) { + continue; + } + + /* Accumulating op: handle accumulate step */ + read_vec_element(s, tcg_res[pass], rd, pass, MO_64); + + switch (opcode) { + case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres); + break; + case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres); + break; + case 0x7: /* SQDMLSL, SQDMLSL2 */ + tcg_gen_neg_i64(tcg_passres, tcg_passres); + /* fall through */ + case 0x3: /* SQDMLAL, SQDMLAL2 */ + gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env, + tcg_res[pass], + tcg_passres); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_i64(tcg_passres); + } + tcg_temp_free_i64(tcg_idx); + } else { + TCGv_i32 tcg_idx = tcg_temp_new_i32(); + + assert(size == 1); + read_vec_element_i32(s, tcg_idx, rm, index, size); + + /* The simplest way to handle the 16x16 indexed ops is to duplicate + * the index into both halves of the 32 bit tcg_idx and then use + * the usual Neon helpers. + */ + tcg_gen_deposit_i32(tcg_idx, tcg_idx, tcg_idx, 16, 16); + + for (pass = 0; pass < 2; pass++) { + TCGv_i32 tcg_op = tcg_temp_new_i32(); + TCGv_i64 tcg_passres; + + read_vec_element_i32(s, tcg_op, rn, pass + (is_q * 2), MO_32); + tcg_res[pass] = tcg_temp_new_i64(); + + if (opcode == 0xa || opcode == 0xb) { + /* Non-accumulating ops */ + tcg_passres = tcg_res[pass]; + } else { + tcg_passres = tcg_temp_new_i64(); + } + + if (memop & MO_SIGN) { + gen_helper_neon_mull_s16(tcg_passres, tcg_op, tcg_idx); + } else { + gen_helper_neon_mull_u16(tcg_passres, tcg_op, tcg_idx); + } + if (satop) { + gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env, + tcg_passres, tcg_passres); + } + tcg_temp_free_i32(tcg_op); + + if (opcode == 0xa || opcode == 0xb) { + continue; + } + + /* Accumulating op: handle accumulate step */ + read_vec_element(s, tcg_res[pass], rd, pass, MO_64); + + switch (opcode) { + case 0x2: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */ + gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass], + tcg_passres); + break; + case 0x6: /* SMLSL, SMLSL2, UMLSL, UMLSL2 */ + gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass], + tcg_passres); + break; + case 0x7: /* SQDMLSL, SQDMLSL2 */ + gen_helper_neon_negl_u32(tcg_passres, tcg_passres); + /* fall through */ + case 0x3: /* SQDMLAL, SQDMLAL2 */ + gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env, + tcg_res[pass], + tcg_passres); + break; + default: + g_assert_not_reached(); + } + tcg_temp_free_i64(tcg_passres); + } + tcg_temp_free_i32(tcg_idx); + } + + for (pass = 0; pass < 2; pass++) { + write_vec_element(s, tcg_res[pass], rd, pass, MO_64); + tcg_temp_free_i64(tcg_res[pass]); + } } if (!TCGV_IS_UNUSED_PTR(fpst)) { -- 1.8.5