[Qemu-devel] [PULL 11/30] target-arm: A64: Saturating and narrowing shift ops

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Peter Maydell <peter.maydell@linaro.org>
To: Anthony Liguori <aliguori@amazon.com>
Cc: "Blue Swirl" <blauwirbel@gmail.com>,
	"Andreas Färber" <andreas.faerber@web.de>,
	qemu-devel@nongnu.org, "Aurelien Jarno" <aurelien@aurel32.net>
Subject: [Qemu-devel] [PULL 11/30] target-arm: A64: Saturating and narrowing shift ops
Date: Mon, 17 Mar 2014 22:12:02 +0000	[thread overview]
Message-ID: <1395094341-19339-12-git-send-email-peter.maydell@linaro.org> (raw)
In-Reply-To: <1395094341-19339-1-git-send-email-peter.maydell@linaro.org>

From: Alex Bennée <alex.bennee@linaro.org>

This implements the remaining [US][Q][R]SHR[U][N][2] opcodes, which are
saturating and narrowing shift right operations. These are used in
things like libav. Note signed shifts can have an "unsigned" saturating
narrow operation which will floor negative values.

Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Message-id: 1394822294-14837-7-git-send-email-peter.maydell@linaro.org
[PMM: Added the scalar encodings, style tweaks]
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
 target-arm/translate-a64.c | 181 ++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 178 insertions(+), 3 deletions(-)

diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 4d40fb0..f8cae69 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -5907,6 +5907,94 @@ static void handle_scalar_simd_shli(DisasContext *s, bool insert,
     tcg_temp_free_i64(tcg_rd);
 }
 
+/* SQSHRN/SQSHRUN - Saturating (signed/unsigned) shift right with
+ * (signed/unsigned) narrowing */
+static void handle_vec_simd_sqshrn(DisasContext *s, bool is_scalar, bool is_q,
+                                   bool is_u_shift, bool is_u_narrow,
+                                   int immh, int immb, int opcode,
+                                   int rn, int rd)
+{
+    int immhb = immh << 3 | immb;
+    int size = 32 - clz32(immh) - 1;
+    int esize = 8 << size;
+    int shift = (2 * esize) - immhb;
+    int elements = is_scalar ? 1 : (64 / esize);
+    bool round = extract32(opcode, 0, 1);
+    TCGMemOp ldop = (size + 1) | (is_u_shift ? 0 : MO_SIGN);
+    TCGv_i64 tcg_rn, tcg_rd, tcg_round;
+    TCGv_i32 tcg_rd_narrowed;
+    TCGv_i64 tcg_final;
+
+    static NeonGenNarrowEnvFn * const signed_narrow_fns[4][2] = {
+        { gen_helper_neon_narrow_sat_s8,
+          gen_helper_neon_unarrow_sat8 },
+        { gen_helper_neon_narrow_sat_s16,
+          gen_helper_neon_unarrow_sat16 },
+        { gen_helper_neon_narrow_sat_s32,
+          gen_helper_neon_unarrow_sat32 },
+        { NULL, NULL },
+    };
+    static NeonGenNarrowEnvFn * const unsigned_narrow_fns[4] = {
+        gen_helper_neon_narrow_sat_u8,
+        gen_helper_neon_narrow_sat_u16,
+        gen_helper_neon_narrow_sat_u32,
+        NULL
+    };
+    NeonGenNarrowEnvFn *narrowfn;
+
+    int i;
+
+    assert(size < 4);
+
+    if (extract32(immh, 3, 1)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    if (is_u_shift) {
+        narrowfn = unsigned_narrow_fns[size];
+    } else {
+        narrowfn = signed_narrow_fns[size][is_u_narrow ? 1 : 0];
+    }
+
+    tcg_rn = tcg_temp_new_i64();
+    tcg_rd = tcg_temp_new_i64();
+    tcg_rd_narrowed = tcg_temp_new_i32();
+    tcg_final = tcg_const_i64(0);
+
+    if (round) {
+        uint64_t round_const = 1ULL << (shift - 1);
+        tcg_round = tcg_const_i64(round_const);
+    } else {
+        TCGV_UNUSED_I64(tcg_round);
+    }
+
+    for (i = 0; i < elements; i++) {
+        read_vec_element(s, tcg_rn, rn, i, ldop);
+        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+                                false, is_u_shift, size+1, shift);
+        narrowfn(tcg_rd_narrowed, cpu_env, tcg_rd);
+        tcg_gen_extu_i32_i64(tcg_rd, tcg_rd_narrowed);
+        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
+    }
+
+    if (!is_q) {
+        clear_vec_high(s, rd);
+        write_vec_element(s, tcg_final, rd, 0, MO_64);
+    } else {
+        write_vec_element(s, tcg_final, rd, 1, MO_64);
+    }
+
+    if (round) {
+        tcg_temp_free_i64(tcg_round);
+    }
+    tcg_temp_free_i64(tcg_rn);
+    tcg_temp_free_i64(tcg_rd);
+    tcg_temp_free_i32(tcg_rd_narrowed);
+    tcg_temp_free_i64(tcg_final);
+    return;
+}
+
 /* Common vector code for handling integer to FP conversion */
 static void handle_simd_intfp_conv(DisasContext *s, int rd, int rn,
                                    int elements, int is_signed,
@@ -6013,6 +6101,11 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
     int immh = extract32(insn, 19, 4);
     bool is_u = extract32(insn, 29, 1);
 
+    if (immh == 0) {
+        unallocated_encoding(s);
+        return;
+    }
+
     switch (opcode) {
     case 0x00: /* SSHR / USHR */
     case 0x02: /* SSRA / USRA */
@@ -6027,6 +6120,20 @@ static void disas_simd_scalar_shift_imm(DisasContext *s, uint32_t insn)
         handle_simd_shift_intfp_conv(s, true, false, is_u, immh, immb,
                                      opcode, rn, rd);
         break;
+    case 0x10: /* SQSHRUN, SQSHRUN2 */
+    case 0x11: /* SQRSHRUN, SQRSHRUN2 */
+        if (!is_u) {
+            unallocated_encoding(s);
+            return;
+        }
+        handle_vec_simd_sqshrn(s, true, false, false, true,
+                               immh, immb, opcode, rn, rd);
+        break;
+    case 0x12: /* SQSHRN, SQSHRN2, UQSHRN */
+    case 0x13: /* SQRSHRN, SQRSHRN2, UQRSHRN, UQRSHRN2 */
+        handle_vec_simd_sqshrn(s, true, false, is_u, is_u,
+                               immh, immb, opcode, rn, rd);
+        break;
     default:
         unsupported_encoding(s, insn);
         break;
@@ -6985,6 +7092,63 @@ static void handle_vec_simd_wshli(DisasContext *s, bool is_q, bool is_u,
     }
 }
 
+/* SHRN/RSHRN - Shift right with narrowing (and potential rounding) */
+static void handle_vec_simd_shrn(DisasContext *s, bool is_q,
+                                 int immh, int immb, int opcode, int rn, int rd)
+{
+    int immhb = immh << 3 | immb;
+    int size = 32 - clz32(immh) - 1;
+    int dsize = 64;
+    int esize = 8 << size;
+    int elements = dsize/esize;
+    int shift = (2 * esize) - immhb;
+    bool round = extract32(opcode, 0, 1);
+    TCGv_i64 tcg_rn, tcg_rd, tcg_final;
+    TCGv_i64 tcg_round;
+    int i;
+
+    if (extract32(immh, 3, 1)) {
+        unallocated_encoding(s);
+        return;
+    }
+
+    tcg_rn = tcg_temp_new_i64();
+    tcg_rd = tcg_temp_new_i64();
+    tcg_final = tcg_temp_new_i64();
+    read_vec_element(s, tcg_final, rd, is_q ? 1 : 0, MO_64);
+
+    if (round) {
+        uint64_t round_const = 1ULL << (shift - 1);
+        tcg_round = tcg_const_i64(round_const);
+    } else {
+        TCGV_UNUSED_I64(tcg_round);
+    }
+
+    for (i = 0; i < elements; i++) {
+        read_vec_element(s, tcg_rn, rn, i, size+1);
+        handle_shri_with_rndacc(tcg_rd, tcg_rn, tcg_round,
+                                false, true, size+1, shift);
+
+        tcg_gen_deposit_i64(tcg_final, tcg_final, tcg_rd, esize * i, esize);
+    }
+
+    if (!is_q) {
+        clear_vec_high(s, rd);
+        write_vec_element(s, tcg_final, rd, 0, MO_64);
+    } else {
+        write_vec_element(s, tcg_final, rd, 1, MO_64);
+    }
+
+    if (round) {
+        tcg_temp_free_i64(tcg_round);
+    }
+    tcg_temp_free_i64(tcg_rn);
+    tcg_temp_free_i64(tcg_rd);
+    tcg_temp_free_i64(tcg_final);
+    return;
+}
+
+
 /* C3.6.14 AdvSIMD shift by immediate
  *  31  30   29 28         23 22  19 18  16 15    11  10 9    5 4    0
  * +---+---+---+-------------+------+------+--------+---+------+------+
@@ -7011,6 +7175,20 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
     case 0x0a: /* SHL / SLI */
         handle_vec_simd_shli(s, is_q, is_u, immh, immb, opcode, rn, rd);
         break;
+    case 0x10: /* SHRN */
+    case 0x11: /* RSHRN / SQRSHRUN */
+        if (is_u) {
+            handle_vec_simd_sqshrn(s, false, is_q, false, true, immh, immb,
+                                   opcode, rn, rd);
+        } else {
+            handle_vec_simd_shrn(s, is_q, immh, immb, opcode, rn, rd);
+        }
+        break;
+    case 0x12: /* SQSHRN / UQSHRN */
+    case 0x13: /* SQRSHRN / UQRSHRN */
+        handle_vec_simd_sqshrn(s, false, is_q, is_u, is_u, immh, immb,
+                               opcode, rn, rd);
+        break;
     case 0x14: /* SSHLL / USHLL */
         handle_vec_simd_wshli(s, is_q, is_u, immh, immb, opcode, rn, rd);
         break;
@@ -7022,9 +7200,6 @@ static void disas_simd_shift_imm(DisasContext *s, uint32_t insn)
         unsupported_encoding(s, insn);
         return;
     default:
-        /* We don't currently implement any of the Narrow or
-         * saturating shifts.
-         */
         unsupported_encoding(s, insn);
         return;
     }
-- 
1.9.0

next prev parent reply	other threads:[~2014-03-17 22:13 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-03-17 22:11 [Qemu-devel] [PULL for-2.0rc1 00/30] target-arm queue Peter Maydell
2014-03-17 22:11 ` [Qemu-devel] [PULL 01/30] vexpress: Set reset-cbar property for CPUs Peter Maydell
2014-03-17 22:11 ` [Qemu-devel] [PULL 02/30] realview-pbx-a9: " Peter Maydell
2014-03-17 22:11 ` [Qemu-devel] [PULL 03/30] exynos4210: Set reset-cbar property of Cortex-A9 CPUs Peter Maydell
2014-03-17 22:11 ` [Qemu-devel] [PULL 04/30] virt: Set reset-cbar on CPUs Peter Maydell
2014-03-17 22:11 ` [Qemu-devel] [PULL 05/30] target-arm: Add ARM_CP_IO notation to PMCR reginfo Peter Maydell
2014-03-17 22:11 ` [Qemu-devel] [PULL 06/30] target-arm: A64: Implement PMULL instruction Peter Maydell
2014-03-17 22:11 ` [Qemu-devel] [PULL 07/30] target-arm: A64: Fix bug in add_sub_ext handling of rn Peter Maydell
2014-03-17 22:11 ` [Qemu-devel] [PULL 08/30] target-arm: A64: Add last AdvSIMD Integer to FP ops Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 09/30] target-arm: A64: Add FSQRT to C3.6.17 (two misc) Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 10/30] target-arm: A64: Add remaining CLS/Z vector ops Peter Maydell
2014-03-17 22:12 ` Peter Maydell [this message]
2014-03-17 22:12 ` [Qemu-devel] [PULL 12/30] target-arm: A64: Implement SADDLP, UADDLP, SADALP, UADALP Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 13/30] target-arm: A64: Implement SHLL, SHLL2 Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 14/30] target-arm: A64: Implement FCVT[NMAPZ][SU] SIMD instructions Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 15/30] target-arm: A64: Implement FCVTN Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 16/30] target-arm: A64: Implement FCVTL Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 17/30] target-arm: A64: List unsupported shift-imm opcodes Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 18/30] target-arm: A64: Add FRECPX (reciprocal exponent) Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 19/30] target-arm: A64: Implement SRI Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 20/30] target-arm: A64: Implement FRINT* Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 21/30] exec-all.h: Increase MAX_OP_PER_INSTR for ARM A64 decoder Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 22/30] target-arm: A64: Handle saturating left shifts SQSHL, SQSHLU, UQSHL Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 23/30] target-arm: A64: Implement FCVTZS, FCVTZU in the shift-imm categories Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 24/30] softfloat: export squash_input_denormal functions Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 25/30] target-arm: A64: Implement AdvSIMD reciprocal estimate insns URECPE, FRECPE Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 26/30] target-arm: A64: Move handle_2misc_narrow function Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 27/30] target-arm: A64: Implement scalar saturating narrow ops Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 28/30] target-arm: A64: Implement FCVTXN Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 29/30] target-arm: A64: Add [UF]RSQRTE (reciprocal root estimate) Peter Maydell
2014-03-17 22:12 ` [Qemu-devel] [PULL 30/30] scripts/qemu-binfmt-conf.sh: Add AArch64 registration Peter Maydell
2014-03-18 16:41 ` [Qemu-devel] [PULL for-2.0rc1 00/30] target-arm queue Peter Maydell

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:4d40fb0 dfblob:f8cae69 )
 OR (
bs:"[Qemu-devel] [PULL 11/30] target-arm: A64: Saturating and narrowing shift ops" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1395094341-19339-12-git-send-email-peter.maydell@linaro.org \
    --to=peter.maydell@linaro.org \
    --cc=aliguori@amazon.com \
    --cc=andreas.faerber@web.de \
    --cc=aurelien@aurel32.net \
    --cc=blauwirbel@gmail.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).