From: Peter Maydell <peter.maydell@linaro.org>
To: qemu-devel@nongnu.org
Cc: "Peter Crosthwaite" <peter.crosthwaite@xilinx.com>,
patches@linaro.org, "Michael Matz" <matz@suse.de>,
"Alexander Graf" <agraf@suse.de>,
"Will Newton" <will.newton@linaro.org>,
"Dirk Mueller" <dmueller@suse.de>,
"Laurent Desnogues" <laurent.desnogues@gmail.com>,
"Alex Bennée" <alex.bennee@linaro.org>,
kvmarm@lists.cs.columbia.edu,
"Christoffer Dall" <christoffer.dall@linaro.org>,
"Richard Henderson" <rth@twiddle.net>
Subject: [Qemu-devel] [PATCH v2 07/25] target-arm: A64: Implement SADDLP, UADDLP, SADALP, UADALP
Date: Fri, 14 Mar 2014 18:37:56 +0000 [thread overview]
Message-ID: <1394822294-14837-8-git-send-email-peter.maydell@linaro.org> (raw)
In-Reply-To: <1394822294-14837-1-git-send-email-peter.maydell@linaro.org>
Implement the SADDLP, UADDLP, SADALP and UADALP instructions
in the SIMD 2-reg misc category.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target-arm/helper-a64.c | 61 +++++++++++++++++++++++++++++++++++++
target-arm/helper-a64.h | 4 +++
target-arm/translate-a64.c | 75 +++++++++++++++++++++++++++++++++++++++++++++-
3 files changed, 139 insertions(+), 1 deletion(-)
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index 8f53223..c31c45e 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -293,3 +293,64 @@ float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
}
return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
}
+
+/* Pairwise long add: add pairs of adjacent elements into
+ * double-width elements in the result (eg _s8 is an 8x8->16 op)
+ */
+uint64_t HELPER(neon_addlp_s8)(uint64_t a)
+{
+ uint64_t nsignmask = 0x0080008000800080ULL;
+ uint64_t wsignmask = 0x8000800080008000ULL;
+ uint64_t elementmask = 0x00ff00ff00ff00ffULL;
+ uint64_t tmp1, tmp2;
+ uint64_t res, signres;
+
+ /* Extract odd elements, sign extend each to a 16 bit field */
+ tmp1 = a & elementmask;
+ tmp1 ^= nsignmask;
+ tmp1 |= wsignmask;
+ tmp1 = (tmp1 - nsignmask) ^ wsignmask;
+ /* Ditto for the even elements */
+ tmp2 = (a >> 8) & elementmask;
+ tmp2 ^= nsignmask;
+ tmp2 |= wsignmask;
+ tmp2 = (tmp2 - nsignmask) ^ wsignmask;
+
+ /* calculate the result by summing bits 0..14, 16..22, etc,
+ * and then adjusting the sign bits 15, 23, etc manually.
+ * This ensures the addition can't overflow the 16 bit field.
+ */
+ signres = (tmp1 ^ tmp2) & wsignmask;
+ res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
+ res ^= signres;
+
+ return res;
+}
+
+uint64_t HELPER(neon_addlp_u8)(uint64_t a)
+{
+ uint64_t tmp;
+
+ tmp = a & 0x00ff00ff00ff00ffULL;
+ tmp += (a >> 8) & 0x00ff00ff00ff00ffULL;
+ return tmp;
+}
+
+uint64_t HELPER(neon_addlp_s16)(uint64_t a)
+{
+ int32_t reslo, reshi;
+
+ reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> 16);
+ reshi = (int32_t)(int16_t)(a >> 32) + (int32_t)(int16_t)(a >> 48);
+
+ return (uint32_t)reslo | (((uint64_t)reshi) << 32);
+}
+
+uint64_t HELPER(neon_addlp_u16)(uint64_t a)
+{
+ uint64_t tmp;
+
+ tmp = a & 0x0000ffff0000ffffULL;
+ tmp += (a >> 16) & 0x0000ffff0000ffffULL;
+ return tmp;
+}
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index a113d22..88fc9fe 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -39,3 +39,7 @@ DEF_HELPER_FLAGS_3(recpsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(recpsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f32, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(rsqrtsf_f64, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
+DEF_HELPER_FLAGS_1(neon_addlp_s8, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(neon_addlp_u8, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(neon_addlp_s16, TCG_CALL_NO_RWG_SE, i64, i64)
+DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index f8cae69..4562fac 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -81,6 +81,7 @@ typedef void NeonGenNarrowEnvFn(TCGv_i32, TCGv_ptr, TCGv_i64);
typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
+typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
/* initialize TCG globals. */
void a64_translate_init(void)
@@ -8456,6 +8457,78 @@ static void handle_rev(DisasContext *s, int opcode, bool u,
}
}
+static void handle_2misc_pairwise(DisasContext *s, int opcode, bool u,
+ bool is_q, int size, int rn, int rd)
+{
+ /* Implement the pairwise operations from 2-misc:
+ * SADDLP, UADDLP, SADALP, UADALP.
+ * These all add pairs of elements in the input to produce a
+ * double-width result element in the output (possibly accumulating).
+ */
+ bool accum = (opcode == 0x6);
+ int maxpass = is_q ? 2 : 1;
+ int pass;
+ TCGv_i64 tcg_res[2];
+
+ if (size == 2) {
+ /* 32 + 32 -> 64 op */
+ TCGMemOp memop = size + (u ? 0 : MO_SIGN);
+
+ for (pass = 0; pass < maxpass; pass++) {
+ TCGv_i64 tcg_op1 = tcg_temp_new_i64();
+ TCGv_i64 tcg_op2 = tcg_temp_new_i64();
+
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op1, rn, pass * 2, memop);
+ read_vec_element(s, tcg_op2, rn, pass * 2 + 1, memop);
+ tcg_gen_add_i64(tcg_res[pass], tcg_op1, tcg_op2);
+ if (accum) {
+ read_vec_element(s, tcg_op1, rd, pass, MO_64);
+ tcg_gen_add_i64(tcg_res[pass], tcg_res[pass], tcg_op1);
+ }
+
+ tcg_temp_free_i64(tcg_op1);
+ tcg_temp_free_i64(tcg_op2);
+ }
+ } else {
+ for (pass = 0; pass < maxpass; pass++) {
+ TCGv_i64 tcg_op = tcg_temp_new_i64();
+ NeonGenOneOpFn *genfn;
+ static NeonGenOneOpFn * const fns[2][2] = {
+ { gen_helper_neon_addlp_s8, gen_helper_neon_addlp_u8 },
+ { gen_helper_neon_addlp_s16, gen_helper_neon_addlp_u16 },
+ };
+
+ genfn = fns[size][u];
+
+ tcg_res[pass] = tcg_temp_new_i64();
+
+ read_vec_element(s, tcg_op, rn, pass, MO_64);
+ genfn(tcg_res[pass], tcg_op);
+
+ if (accum) {
+ read_vec_element(s, tcg_op, rd, pass, MO_64);
+ if (size == 0) {
+ gen_helper_neon_addl_u16(tcg_res[pass],
+ tcg_res[pass], tcg_op);
+ } else {
+ gen_helper_neon_addl_u32(tcg_res[pass],
+ tcg_res[pass], tcg_op);
+ }
+ }
+ tcg_temp_free_i64(tcg_op);
+ }
+ }
+ if (!is_q) {
+ tcg_res[1] = tcg_const_i64(0);
+ }
+ for (pass = 0; pass < 2; pass++) {
+ write_vec_element(s, tcg_res[pass], rd, pass, MO_64);
+ tcg_temp_free_i64(tcg_res[pass]);
+ }
+}
+
/* C3.6.17 AdvSIMD two reg misc
* 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
* +---+---+---+-----------+------+-----------+--------+-----+------+------+
@@ -8510,7 +8583,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
- unsupported_encoding(s, insn);
+ handle_2misc_pairwise(s, opcode, u, is_q, size, rn, rd);
return;
case 0x13: /* SHLL, SHLL2 */
if (u == 0 || size == 3) {
--
1.9.0
next prev parent reply other threads:[~2014-03-14 18:38 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-03-14 18:37 [Qemu-devel] [PATCH v2 00/25] A64: Neon patches, sixth set Peter Maydell
2014-03-14 18:37 ` [Qemu-devel] [PATCH v2 01/25] target-arm: A64: Implement PMULL instruction Peter Maydell
2014-03-14 18:37 ` [Qemu-devel] [PATCH v2 02/25] target-arm: A64: Fix bug in add_sub_ext handling of rn Peter Maydell
2014-03-14 18:37 ` [Qemu-devel] [PATCH v2 03/25] target-arm: A64: Add last AdvSIMD Integer to FP ops Peter Maydell
2014-03-14 18:37 ` [Qemu-devel] [PATCH v2 04/25] target-arm: A64: Add FSQRT to C3.6.17 (two misc) Peter Maydell
2014-03-14 18:37 ` [Qemu-devel] [PATCH v2 05/25] target-arm: A64: Add remaining CLS/Z vector ops Peter Maydell
2014-03-14 18:37 ` [Qemu-devel] [PATCH v2 06/25] target-arm: A64: Saturating and narrowing shift ops Peter Maydell
2014-03-14 18:37 ` Peter Maydell [this message]
2014-03-14 18:37 ` [Qemu-devel] [PATCH v2 08/25] target-arm: A64: Implement SHLL, SHLL2 Peter Maydell
2014-03-14 18:37 ` [Qemu-devel] [PATCH v2 09/25] target-arm: A64: Implement FCVT[NMAPZ][SU] SIMD instructions Peter Maydell
2014-03-14 18:37 ` [Qemu-devel] [PATCH v2 10/25] target-arm: A64: Implement FCVTN Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 11/25] target-arm: A64: Implement FCVTL Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 12/25] target-arm: A64: List unsupported shift-imm opcodes Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 13/25] target-arm: A64: Add FRECPX (reciprocal exponent) Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 14/25] target-arm: A64: Implement SRI Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 15/25] target-arm: A64: Implement FRINT* Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 16/25] exec-all.h: Increase MAX_OP_PER_INSTR for ARM A64 decoder Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 17/25] target-arm: A64: Handle saturating left shifts SQSHL, SQSHLU, UQSHL Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 18/25] target-arm: A64: Implement FCVTZS, FCVTZU in the shift-imm categories Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 19/25] softfloat: export squash_input_denormal functions Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 20/25] target-arm: A64: Implement AdvSIMD reciprocal estimate insns URECPE, FRECPE Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 21/25] target-arm: A64: Move handle_2misc_narrow function Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 22/25] target-arm: A64: Implement scalar saturating narrow ops Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 23/25] target-arm: A64: Implement FCVTXN Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 24/25] target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate) Peter Maydell
2014-03-14 18:38 ` [Qemu-devel] [PATCH v2 25/25] scripts/qemu-binfmt-conf.sh: Add AArch64 registration Peter Maydell
2014-03-14 23:21 ` [Qemu-devel] [PATCH v2 00/25] A64: Neon patches, sixth set Richard Henderson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1394822294-14837-8-git-send-email-peter.maydell@linaro.org \
--to=peter.maydell@linaro.org \
--cc=agraf@suse.de \
--cc=alex.bennee@linaro.org \
--cc=christoffer.dall@linaro.org \
--cc=dmueller@suse.de \
--cc=kvmarm@lists.cs.columbia.edu \
--cc=laurent.desnogues@gmail.com \
--cc=matz@suse.de \
--cc=patches@linaro.org \
--cc=peter.crosthwaite@xilinx.com \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
--cc=will.newton@linaro.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).