From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Cc: "Peter Crosthwaite" <peter.crosthwaite@xilinx.com>,
patches@linaro.org, "Michael Matz" <matz@suse.de>,
"Alexander Graf" <agraf@suse.de>,
"Claudio Fontana" <claudio.fontana@linaro.org>,
"Dirk Mueller" <dmueller@suse.de>,
"Will Newton" <will.newton@linaro.org>,
"Laurent Desnogues" <laurent.desnogues@gmail.com>,
"Alex Bennée" <alex.bennee@linaro.org>,
kvmarm@lists.cs.columbia.edu,
"Christoffer Dall" <christoffer.dall@linaro.org>,
"Richard Henderson" <rth@twiddle.net>
Subject: [Qemu-devel] [PATCH 2/6] target-arm: A64: Add most remaining three-reg-diff widening ops
Date: Sun, 16 Feb 2014 18:21:07 +0000 [thread overview]
Message-ID: <1392574872-28725-3-git-send-email-peter.maydell@linaro.org> (raw)
In-Reply-To: <1392574872-28725-1-git-send-email-peter.maydell@linaro.org>
Add the remainder of the 64x64->128 operations in the three-reg-diff
category except for PMULL, PMULL2.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target-arm/translate-a64.c | 109 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 88 insertions(+), 21 deletions(-)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index d4e7a20..809fbdb 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -75,8 +75,10 @@ typedef struct AArch64DecodeTable {
/* Function prototype for gen_ functions for calling Neon helpers */
typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
typedef void NeonGenNarrowFn(TCGv_i32, TCGv_i64);
typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
+typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
@@ -6879,6 +6881,24 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
}
}
+/* Generate code to do a "long" addition or subtraction, ie one done in
+ * TCGv_i64 on vector lanes twice the width specified by size.
+ */
+static void gen_neon_addl(int size, bool is_sub, TCGv_i64 tcg_res,
+ TCGv_i64 tcg_op1, TCGv_i64 tcg_op2)
+{
+ static NeonGenTwo64OpFn * const fns[3][2] = {
+ { gen_helper_neon_addl_u16, gen_helper_neon_subl_u16 },
+ { gen_helper_neon_addl_u32, gen_helper_neon_subl_u32 },
+ { tcg_gen_add_i64, tcg_gen_sub_i64 },
+ };
+ NeonGenTwo64OpFn *genfn;
+ assert(size < 3);
+
+ genfn = fns[size][is_sub];
+ genfn(tcg_res, tcg_op1, tcg_op2);
+}
+
static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
int opcode, int rd, int rn, int rm)
{
@@ -6934,6 +6954,12 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
}
switch (opcode) {
+ case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+ tcg_gen_add_i64(tcg_passres, tcg_op1, tcg_op2);
+ break;
+ case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
+ tcg_gen_sub_i64(tcg_passres, tcg_op1, tcg_op2);
+ break;
case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
{
@@ -6954,15 +6980,31 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
case 12: /* UMULL, UMULL2, SMULL, SMULL2 */
tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
break;
+ case 9: /* SQDMLAL, SQDMLAL2 */
+ case 11: /* SQDMLSL, SQDMLSL2 */
+ case 13: /* SQDMULL, SQDMULL2 */
+ tcg_gen_mul_i64(tcg_passres, tcg_op1, tcg_op2);
+ gen_helper_neon_addl_saturate_s64(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ break;
default:
g_assert_not_reached();
}
- if (accop > 0) {
+ if (opcode == 9 || opcode == 11) {
+ /* saturating accumulate ops */
+ if (accop < 0) {
+ tcg_gen_neg_i64(tcg_passres, tcg_passres);
+ }
+ gen_helper_neon_addl_saturate_s64(tcg_res[pass], cpu_env,
+ tcg_res[pass], tcg_passres);
+ } else if (accop > 0) {
tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
- tcg_temp_free_i64(tcg_passres);
} else if (accop < 0) {
tcg_gen_sub_i64(tcg_res[pass], tcg_res[pass], tcg_passres);
+ }
+
+ if (accop != 0) {
tcg_temp_free_i64(tcg_passres);
}
@@ -6987,6 +7029,23 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
}
switch (opcode) {
+ case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
+ case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
+ {
+ TCGv_i64 tcg_op2_64 = tcg_temp_new_i64();
+ static NeonGenWidenFn * const widenfns[2][2] = {
+ { gen_helper_neon_widen_s8, gen_helper_neon_widen_u8 },
+ { gen_helper_neon_widen_s16, gen_helper_neon_widen_u16 },
+ };
+ NeonGenWidenFn *widenfn = widenfns[size][is_u];
+
+ widenfn(tcg_op2_64, tcg_op2);
+ widenfn(tcg_passres, tcg_op1);
+ gen_neon_addl(size, (opcode == 2), tcg_passres,
+ tcg_passres, tcg_op2_64);
+ tcg_temp_free_i64(tcg_op2_64);
+ break;
+ }
case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
if (size == 0) {
@@ -7020,28 +7079,32 @@ static void handle_3rd_widening(DisasContext *s, int is_q, int is_u, int size,
}
}
break;
+ case 9: /* SQDMLAL, SQDMLAL2 */
+ case 11: /* SQDMLSL, SQDMLSL2 */
+ case 13: /* SQDMULL, SQDMULL2 */
+ assert(size == 1);
+ gen_helper_neon_mull_s16(tcg_passres, tcg_op1, tcg_op2);
+ gen_helper_neon_addl_saturate_s32(tcg_passres, cpu_env,
+ tcg_passres, tcg_passres);
+ break;
default:
g_assert_not_reached();
}
tcg_temp_free_i32(tcg_op1);
tcg_temp_free_i32(tcg_op2);
- if (accop > 0) {
- if (size == 0) {
- gen_helper_neon_addl_u16(tcg_res[pass], tcg_res[pass],
- tcg_passres);
- } else {
- gen_helper_neon_addl_u32(tcg_res[pass], tcg_res[pass],
- tcg_passres);
- }
- tcg_temp_free_i64(tcg_passres);
- } else if (accop < 0) {
- if (size == 0) {
- gen_helper_neon_subl_u16(tcg_res[pass], tcg_res[pass],
- tcg_passres);
+ if (accop != 0) {
+ if (opcode == 9 || opcode == 11) {
+ /* saturating accumulate ops */
+ if (accop < 0) {
+ gen_helper_neon_negl_u32(tcg_passres, tcg_passres);
+ }
+ gen_helper_neon_addl_saturate_s32(tcg_res[pass], cpu_env,
+ tcg_res[pass],
+ tcg_passres);
} else {
- gen_helper_neon_subl_u32(tcg_res[pass], tcg_res[pass],
- tcg_passres);
+ gen_neon_addl(size, (accop < 0), tcg_res[pass],
+ tcg_res[pass], tcg_passres);
}
tcg_temp_free_i64(tcg_passres);
}
@@ -7091,19 +7154,23 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
/* 128 x 128 -> 64 */
unsupported_encoding(s, insn);
break;
+ case 14: /* PMULL, PMULL2 */
+ if (is_u || size == 1 || size == 2) {
+ unallocated_encoding(s);
+ return;
+ }
+ unsupported_encoding(s, insn);
+ break;
case 9: /* SQDMLAL, SQDMLAL2 */
case 11: /* SQDMLSL, SQDMLSL2 */
case 13: /* SQDMULL, SQDMULL2 */
- case 14: /* PMULL, PMULL2 */
- if (is_u) {
+ if (is_u || size == 0) {
unallocated_encoding(s);
return;
}
/* fall through */
case 0: /* SADDL, SADDL2, UADDL, UADDL2 */
case 2: /* SSUBL, SSUBL2, USUBL, USUBL2 */
- unsupported_encoding(s, insn);
- break;
case 5: /* SABAL, SABAL2, UABAL, UABAL2 */
case 7: /* SABDL, SABDL2, UABDL, UABDL2 */
case 8: /* SMLAL, SMLAL2, UMLAL, UMLAL2 */
--
1.8.5
next prev parent reply other threads:[~2014-02-16 18:27 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-02-16 18:21 [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR Peter Maydell
2014-02-16 18:21 ` [Qemu-devel] [PATCH 1/6] target-arm: A64: Add opcode comments to disas_simd_three_reg_diff Peter Maydell
2014-02-16 18:21 ` Peter Maydell [this message]
2014-02-16 18:21 ` [Qemu-devel] [PATCH 3/6] target-arm: A64: Implement the wide 3-reg-different operations Peter Maydell
2014-02-16 18:21 ` [Qemu-devel] [PATCH 4/6] target-arm: A64: Implement narrowing three-reg-diff operations Peter Maydell
2014-02-16 18:21 ` [Qemu-devel] [PATCH 5/6] target-arm: A64: Implement PMULL instruction Peter Maydell
2014-02-17 16:29 ` Richard Henderson
2014-02-17 22:59 ` Peter Maydell
2014-02-16 18:21 ` [Qemu-devel] [PATCH 6/6] target-arm: A64: Implement unprivileged load/store Peter Maydell
2014-02-17 16:30 ` [Qemu-devel] [PATCH 0/6] A64: finish Neon 3-reg-diff category, add LDTR/STTR Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1392574872-28725-3-git-send-email-peter.maydell@linaro.org \
--to=peter.maydell@linaro.org \
--cc=agraf@suse.de \
--cc=alex.bennee@linaro.org \
--cc=christoffer.dall@linaro.org \
--cc=claudio.fontana@linaro.org \
--cc=dmueller@suse.de \
--cc=kvmarm@lists.cs.columbia.edu \
--cc=laurent.desnogues@gmail.com \
--cc=matz@suse.de \
--cc=patches@linaro.org \
--cc=peter.crosthwaite@xilinx.com \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
--cc=will.newton@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).