* [Qemu-devel] [PATCH for-2.0 v2 0/2] A64: Implement last four Neon insns
@ 2014-03-18 23:01 Peter Maydell
2014-03-18 23:01 ` [Qemu-devel] [PATCH for-2.0 v2 1/2] target-arm: A64: Add saturating int ops (SQNEG/SQABS) Peter Maydell
2014-03-18 23:01 ` [Qemu-devel] [PATCH for-2.0 v2 2/2] target-arm: A64: Add saturating accumulate ops (USQADD/SUQADD) Peter Maydell
0 siblings, 2 replies; 3+ messages in thread
From: Peter Maydell @ 2014-03-18 23:01 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Crosthwaite, patches, Michael Matz, Alexander Graf,
Will Newton, Dirk Mueller, Laurent Desnogues, Alex Bennée,
kvmarm, Christoffer Dall, Richard Henderson
These two patches implement the last four A64 Neon instructions:
SQNEG, SQABS, USQADD, SUQADD
meaning that we have complete coverage of the userspace instruction
set, with the exception of the optional crypto and CRC instructions.
As with the previous set, these are safe changes so I'm planning to
get them into 2.0 even though they're not strictly speaking
bugfixes. (Specifically, expect to see these plus a few other
bugfixes in a pullreq tomorrow aiming at rc1.)
Changes v1->v2: use extract32/deposit32 in the USATACC and
SSATACC macros rather than raw bit-twiddling.
thanks
-- PMM
Alex Bennée (2):
target-arm: A64: Add saturating int ops (SQNEG/SQABS)
target-arm: A64: Add saturating accumulate ops (USQADD/SUQADD)
target-arm/helper.h | 34 ++++++---
target-arm/neon_helper.c | 187 +++++++++++++++++++++++++++++++++++++++++++++
target-arm/translate-a64.c | 160 +++++++++++++++++++++++++++++++++++---
3 files changed, 359 insertions(+), 22 deletions(-)
--
1.9.0
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Qemu-devel] [PATCH for-2.0 v2 1/2] target-arm: A64: Add saturating int ops (SQNEG/SQABS)
2014-03-18 23:01 [Qemu-devel] [PATCH for-2.0 v2 0/2] A64: Implement last four Neon insns Peter Maydell
@ 2014-03-18 23:01 ` Peter Maydell
2014-03-18 23:01 ` [Qemu-devel] [PATCH for-2.0 v2 2/2] target-arm: A64: Add saturating accumulate ops (USQADD/SUQADD) Peter Maydell
1 sibling, 0 replies; 3+ messages in thread
From: Peter Maydell @ 2014-03-18 23:01 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Crosthwaite, patches, Michael Matz, Alexander Graf,
Will Newton, Dirk Mueller, Laurent Desnogues, Alex Bennée,
kvmarm, Christoffer Dall, Richard Henderson
From: Alex Bennée <alex.bennee@linaro.org>
This mostly re-uses the existing NEON helpers with an additional two for
the 64 bit case. I also took the opportunity to add TCG_CALL_NO_RWG
options to the helpers as they don't modify globals (saturation flags
are in the CPU Environment).
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
target-arm/helper.h | 14 +++++++------
target-arm/neon_helper.c | 22 ++++++++++++++++++++
target-arm/translate-a64.c | 51 ++++++++++++++++++++++++++++++++++++++++------
3 files changed, 75 insertions(+), 12 deletions(-)
diff --git a/target-arm/helper.h b/target-arm/helper.h
index a3d6f32..b006fd5 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -375,12 +375,14 @@ DEF_HELPER_2(neon_mull_s16, i64, i32, i32)
DEF_HELPER_1(neon_negl_u16, i64, i64)
DEF_HELPER_1(neon_negl_u32, i64, i64)
-DEF_HELPER_2(neon_qabs_s8, i32, env, i32)
-DEF_HELPER_2(neon_qabs_s16, i32, env, i32)
-DEF_HELPER_2(neon_qabs_s32, i32, env, i32)
-DEF_HELPER_2(neon_qneg_s8, i32, env, i32)
-DEF_HELPER_2(neon_qneg_s16, i32, env, i32)
-DEF_HELPER_2(neon_qneg_s32, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qabs_s8, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qabs_s16, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qabs_s32, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qabs_s64, TCG_CALL_NO_RWG, i64, env, i64)
+DEF_HELPER_FLAGS_2(neon_qneg_s8, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qneg_s16, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qneg_s32, TCG_CALL_NO_RWG, i32, env, i32)
+DEF_HELPER_FLAGS_2(neon_qneg_s64, TCG_CALL_NO_RWG, i64, env, i64)
DEF_HELPER_3(neon_abd_f32, i32, i32, i32, ptr)
DEF_HELPER_3(neon_ceq_f32, i32, i32, i32, ptr)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 13752ba..e23f224 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -1776,6 +1776,28 @@ uint32_t HELPER(neon_qneg_s32)(CPUARMState *env, uint32_t x)
return x;
}
+uint64_t HELPER(neon_qabs_s64)(CPUARMState *env, uint64_t x)
+{
+ if (x == SIGNBIT64) {
+ SET_QC();
+ x = ~SIGNBIT64;
+ } else if ((int64_t)x < 0) {
+ x = -x;
+ }
+ return x;
+}
+
+uint64_t HELPER(neon_qneg_s64)(CPUARMState *env, uint64_t x)
+{
+ if (x == SIGNBIT64) {
+ SET_QC();
+ x = ~SIGNBIT64;
+ } else {
+ x = -x;
+ }
+ return x;
+}
+
/* NEON Float helpers. */
uint32_t HELPER(neon_abd_f32)(uint32_t a, uint32_t b, void *fpstp)
{
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index befffac..18659d7 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -73,6 +73,7 @@ typedef struct AArch64DecodeTable {
} AArch64DecodeTable;
/* Function prototype for gen_ functions for calling Neon helpers */
+typedef void NeonGenOneOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32);
typedef void NeonGenTwoOpFn(TCGv_i32, TCGv_i32, TCGv_i32);
typedef void NeonGenTwoOpEnvFn(TCGv_i32, TCGv_ptr, TCGv_i32, TCGv_i32);
typedef void NeonGenTwo64OpFn(TCGv_i64, TCGv_i64, TCGv_i64);
@@ -6942,6 +6943,13 @@ static void handle_2misc_64(DisasContext *s, int opcode, bool u,
*/
tcg_gen_not_i64(tcg_rd, tcg_rn);
break;
+ case 0x7: /* SQABS, SQNEG */
+ if (u) {
+ gen_helper_neon_qneg_s64(tcg_rd, cpu_env, tcg_rn);
+ } else {
+ gen_helper_neon_qabs_s64(tcg_rd, cpu_env, tcg_rn);
+ }
+ break;
case 0xa: /* CMLT */
/* 64 bit integer comparison against zero, result is
* test ? (2^64 - 1) : 0. We implement via setcond(!test) and
@@ -7332,6 +7340,8 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
TCGv_ptr tcg_fpstatus;
switch (opcode) {
+ case 0x7: /* SQABS / SQNEG */
+ break;
case 0xa: /* CMLT */
if (u) {
unallocated_encoding(s);
@@ -7441,11 +7451,25 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
write_fp_dreg(s, rd, tcg_rd);
tcg_temp_free_i64(tcg_rd);
tcg_temp_free_i64(tcg_rn);
- } else if (size == 2) {
- TCGv_i32 tcg_rn = read_fp_sreg(s, rn);
+ } else {
+ TCGv_i32 tcg_rn = tcg_temp_new_i32();
TCGv_i32 tcg_rd = tcg_temp_new_i32();
+ read_vec_element_i32(s, tcg_rn, rn, 0, size);
+
switch (opcode) {
+ case 0x7: /* SQABS, SQNEG */
+ {
+ NeonGenOneOpEnvFn *genfn;
+ static NeonGenOneOpEnvFn * const fns[3][2] = {
+ { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
+ { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
+ { gen_helper_neon_qabs_s32, gen_helper_neon_qneg_s32 },
+ };
+ genfn = fns[size][u];
+ genfn(tcg_rd, cpu_env, tcg_rn);
+ break;
+ }
case 0x1a: /* FCVTNS */
case 0x1b: /* FCVTMS */
case 0x1c: /* FCVTAS */
@@ -7475,8 +7499,6 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
write_fp_sreg(s, rd, tcg_rd);
tcg_temp_free_i32(tcg_rd);
tcg_temp_free_i32(tcg_rn);
- } else {
- g_assert_not_reached();
}
if (is_fcvt) {
@@ -9177,8 +9199,7 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
unallocated_encoding(s);
return;
}
- unsupported_encoding(s, insn);
- return;
+ break;
case 0xc ... 0xf:
case 0x16 ... 0x1d:
case 0x1f:
@@ -9389,6 +9410,13 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
gen_helper_cls32(tcg_res, tcg_op);
}
break;
+ case 0x7: /* SQABS, SQNEG */
+ if (u) {
+ gen_helper_neon_qneg_s32(tcg_res, cpu_env, tcg_op);
+ } else {
+ gen_helper_neon_qabs_s32(tcg_res, cpu_env, tcg_op);
+ }
+ break;
case 0xb: /* ABS, NEG */
if (u) {
tcg_gen_neg_i32(tcg_res, tcg_op);
@@ -9463,6 +9491,17 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
gen_helper_neon_cnt_u8(tcg_res, tcg_op);
}
break;
+ case 0x7: /* SQABS, SQNEG */
+ {
+ NeonGenOneOpEnvFn *genfn;
+ static NeonGenOneOpEnvFn * const fns[2][2] = {
+ { gen_helper_neon_qabs_s8, gen_helper_neon_qneg_s8 },
+ { gen_helper_neon_qabs_s16, gen_helper_neon_qneg_s16 },
+ };
+ genfn = fns[size][u];
+ genfn(tcg_res, cpu_env, tcg_op);
+ break;
+ }
case 0x8: /* CMGT, CMGE */
case 0x9: /* CMEQ, CMLE */
case 0xa: /* CMLT */
--
1.9.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [Qemu-devel] [PATCH for-2.0 v2 2/2] target-arm: A64: Add saturating accumulate ops (USQADD/SUQADD)
2014-03-18 23:01 [Qemu-devel] [PATCH for-2.0 v2 0/2] A64: Implement last four Neon insns Peter Maydell
2014-03-18 23:01 ` [Qemu-devel] [PATCH for-2.0 v2 1/2] target-arm: A64: Add saturating int ops (SQNEG/SQABS) Peter Maydell
@ 2014-03-18 23:01 ` Peter Maydell
1 sibling, 0 replies; 3+ messages in thread
From: Peter Maydell @ 2014-03-18 23:01 UTC (permalink / raw)
To: qemu-devel
Cc: Peter Crosthwaite, patches, Michael Matz, Alexander Graf,
Will Newton, Dirk Mueller, Laurent Desnogues, Alex Bennée,
kvmarm, Christoffer Dall, Richard Henderson
From: Alex Bennée <alex.bennee@linaro.org>
Add the saturating accumulate operations USQADD and SUQADD
to the A64 instruction set. This completes coverage of A64 Neon.
These operations (which are unsigned + signed -> signed and
signed + unsigned -> unsigned) don't exist in the A32/T32
instruction set, so require a complete new set of helper functions.
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
---
target-arm/helper.h | 20 ++++--
target-arm/neon_helper.c | 165 +++++++++++++++++++++++++++++++++++++++++++++
target-arm/translate-a64.c | 109 ++++++++++++++++++++++++++++--
3 files changed, 284 insertions(+), 10 deletions(-)
diff --git a/target-arm/helper.h b/target-arm/helper.h
index b006fd5..366c1b3 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -186,12 +186,20 @@ DEF_HELPER_FLAGS_2(rints, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(rintd, TCG_CALL_NO_RWG, f64, f64, ptr)
/* neon_helper.c */
-DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qadd_s8, i32, env, i32, i32)
-DEF_HELPER_3(neon_qadd_u16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qadd_s16, i32, env, i32, i32)
-DEF_HELPER_3(neon_qadd_u32, i32, env, i32, i32)
-DEF_HELPER_3(neon_qadd_s32, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_qadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_qadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_qadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_qadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_qadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_qadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_uqadd_s8, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_uqadd_s16, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_uqadd_s32, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_uqadd_s64, TCG_CALL_NO_RWG, i64, env, i64, i64)
+DEF_HELPER_FLAGS_3(neon_sqadd_u8, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_sqadd_u16, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_sqadd_u32, TCG_CALL_NO_RWG, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(neon_sqadd_u64, TCG_CALL_NO_RWG, i64, env, i64, i64)
DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32)
DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32)
DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index e23f224..8d6f9a9 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -236,6 +236,171 @@ uint64_t HELPER(neon_qadd_s64)(CPUARMState *env, uint64_t src1, uint64_t src2)
return res;
}
+/* Unsigned saturating accumulate of signed value
+ *
+ * Op1/Rn is treated as signed
+ * Op2/Rd is treated as unsigned
+ *
+ * Explicit casting is used to ensure the correct sign extension of
+ * inputs. The result is treated as a unsigned value and saturated as such.
+ *
+ * We use a macro for the 8/16 bit cases which expects signed integers of va,
+ * vb, and vr for interim calculation and an unsigned 32 bit result value r.
+ */
+
+#define USATACC(bits, shift) \
+ do { \
+ va = sextract32(a, shift, bits); \
+ vb = extract32(b, shift, bits); \
+ vr = va + vb; \
+ if (vr > UINT##bits##_MAX) { \
+ SET_QC(); \
+ vr = UINT##bits##_MAX; \
+ } else if (vr < 0) { \
+ SET_QC(); \
+ vr = 0; \
+ } \
+ r = deposit32(r, shift, bits, vr); \
+ } while (0)
+
+uint32_t HELPER(neon_uqadd_s8)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int16_t va, vb, vr;
+ uint32_t r = 0;
+
+ USATACC(8, 0);
+ USATACC(8, 8);
+ USATACC(8, 16);
+ USATACC(8, 24);
+ return r;
+}
+
+uint32_t HELPER(neon_uqadd_s16)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int32_t va, vb, vr;
+ uint64_t r = 0;
+
+ USATACC(16, 0);
+ USATACC(16, 16);
+ return r;
+}
+
+#undef USATACC
+
+uint32_t HELPER(neon_uqadd_s32)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int64_t va = (int32_t)a;
+ int64_t vb = (uint32_t)b;
+ int64_t vr = va + vb;
+ if (vr > UINT32_MAX) {
+ SET_QC();
+ vr = UINT32_MAX;
+ } else if (vr < 0) {
+ SET_QC();
+ vr = 0;
+ }
+ return vr;
+}
+
+uint64_t HELPER(neon_uqadd_s64)(CPUARMState *env, uint64_t a, uint64_t b)
+{
+ uint64_t res;
+ res = a + b;
+ /* We only need to look at the pattern of SIGN bits to detect
+ * +ve/-ve saturation
+ */
+ if (~a & b & ~res & SIGNBIT64) {
+ SET_QC();
+ res = UINT64_MAX;
+ } else if (a & ~b & res & SIGNBIT64) {
+ SET_QC();
+ res = 0;
+ }
+ return res;
+}
+
+/* Signed saturating accumulate of unsigned value
+ *
+ * Op1/Rn is treated as unsigned
+ * Op2/Rd is treated as signed
+ *
+ * The result is treated as a signed value and saturated as such
+ *
+ * We use a macro for the 8/16 bit cases which expects signed integers of va,
+ * vb, and vr for interim calculation and an unsigned 32 bit result value r.
+ */
+
+#define SSATACC(bits, shift) \
+ do { \
+ va = extract32(a, shift, bits); \
+ vb = sextract32(b, shift, bits); \
+ vr = va + vb; \
+ if (vr > INT##bits##_MAX) { \
+ SET_QC(); \
+ vr = INT##bits##_MAX; \
+ } else if (vr < INT##bits##_MIN) { \
+ SET_QC(); \
+ vr = INT##bits##_MIN; \
+ } \
+ r = deposit32(r, shift, bits, vr); \
+ } while (0)
+
+uint32_t HELPER(neon_sqadd_u8)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int16_t va, vb, vr;
+ uint32_t r = 0;
+
+ SSATACC(8, 0);
+ SSATACC(8, 8);
+ SSATACC(8, 16);
+ SSATACC(8, 24);
+ return r;
+}
+
+uint32_t HELPER(neon_sqadd_u16)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int32_t va, vb, vr;
+ uint32_t r = 0;
+
+ SSATACC(16, 0);
+ SSATACC(16, 16);
+
+ return r;
+}
+
+#undef SSATACC
+
+uint32_t HELPER(neon_sqadd_u32)(CPUARMState *env, uint32_t a, uint32_t b)
+{
+ int64_t res;
+ int64_t op1 = (uint32_t)a;
+ int64_t op2 = (int32_t)b;
+ res = op1 + op2;
+ if (res > INT32_MAX) {
+ SET_QC();
+ res = INT32_MAX;
+ } else if (res < INT32_MIN) {
+ SET_QC();
+ res = INT32_MIN;
+ }
+ return res;
+}
+
+uint64_t HELPER(neon_sqadd_u64)(CPUARMState *env, uint64_t a, uint64_t b)
+{
+ uint64_t res;
+ res = a + b;
+ /* We only need to look at the pattern of SIGN bits to detect an overflow */
+ if (((a & res)
+ | (~b & res)
+ | (a & ~b)) & SIGNBIT64) {
+ SET_QC();
+ res = INT64_MAX;
+ }
+ return res;
+}
+
+
#define NEON_USAT(dest, src1, src2, type) do { \
uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \
if (tmp != (type)tmp) { \
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 18659d7..9f06450 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -7321,6 +7321,101 @@ static void handle_2misc_narrow(DisasContext *s, bool scalar,
}
}
+/* Remaining saturating accumulating ops */
+static void handle_2misc_satacc(DisasContext *s, bool is_scalar, bool is_u,
+ bool is_q, int size, int rn, int rd)
+{
+ bool is_double = (size == 3);
+
+ if (is_double) {
+ TCGv_i64 tcg_rn = tcg_temp_new_i64();
+ TCGv_i64 tcg_rd = tcg_temp_new_i64();
+ int pass;
+
+ for (pass = 0; pass < (is_scalar ? 1 : 2); pass++) {
+ read_vec_element(s, tcg_rn, rn, pass, MO_64);
+ read_vec_element(s, tcg_rd, rd, pass, MO_64);
+
+ if (is_u) { /* USQADD */
+ gen_helper_neon_uqadd_s64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ } else { /* SUQADD */
+ gen_helper_neon_sqadd_u64(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ }
+ write_vec_element(s, tcg_rd, rd, pass, MO_64);
+ }
+ if (is_scalar) {
+ clear_vec_high(s, rd);
+ }
+
+ tcg_temp_free_i64(tcg_rd);
+ tcg_temp_free_i64(tcg_rn);
+ } else {
+ TCGv_i32 tcg_rn = tcg_temp_new_i32();
+ TCGv_i32 tcg_rd = tcg_temp_new_i32();
+ int pass, maxpasses;
+
+ if (is_scalar) {
+ maxpasses = 1;
+ } else {
+ maxpasses = is_q ? 4 : 2;
+ }
+
+ for (pass = 0; pass < maxpasses; pass++) {
+ if (is_scalar) {
+ read_vec_element_i32(s, tcg_rn, rn, pass, size);
+ read_vec_element_i32(s, tcg_rd, rd, pass, size);
+ } else {
+ read_vec_element_i32(s, tcg_rn, rn, pass, MO_32);
+ read_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
+ }
+
+ if (is_u) { /* USQADD */
+ switch (size) {
+ case 0:
+ gen_helper_neon_uqadd_s8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ break;
+ case 1:
+ gen_helper_neon_uqadd_s16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ break;
+ case 2:
+ gen_helper_neon_uqadd_s32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ } else { /* SUQADD */
+ switch (size) {
+ case 0:
+ gen_helper_neon_sqadd_u8(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ break;
+ case 1:
+ gen_helper_neon_sqadd_u16(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ break;
+ case 2:
+ gen_helper_neon_sqadd_u32(tcg_rd, cpu_env, tcg_rn, tcg_rd);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ }
+
+ if (is_scalar) {
+ TCGv_i64 tcg_zero = tcg_const_i64(0);
+ write_vec_element(s, tcg_zero, rd, 0, MO_64);
+ tcg_temp_free_i64(tcg_zero);
+ }
+ write_vec_element_i32(s, tcg_rd, rd, pass, MO_32);
+ }
+
+ if (!is_q) {
+ clear_vec_high(s, rd);
+ }
+
+ tcg_temp_free_i32(tcg_rd);
+ tcg_temp_free_i32(tcg_rn);
+ }
+}
+
/* C3.6.12 AdvSIMD scalar two reg misc
* 31 30 29 28 24 23 22 21 17 16 12 11 10 9 5 4 0
* +-----+---+-----------+------+-----------+--------+-----+------+------+
@@ -7340,6 +7435,9 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
TCGv_ptr tcg_fpstatus;
switch (opcode) {
+ case 0x3: /* USQADD / SUQADD*/
+ handle_2misc_satacc(s, true, u, false, size, rn, rd);
+ return;
case 0x7: /* SQABS / SQNEG */
break;
case 0xa: /* CMLT */
@@ -7427,10 +7525,7 @@ static void disas_simd_scalar_two_reg_misc(DisasContext *s, uint32_t insn)
}
break;
default:
- /* Other categories of encoding in this class:
- * + SUQADD/USQADD/SQABS/SQNEG : size 8, 16, 32 or 64
- */
- unsupported_encoding(s, insn);
+ unallocated_encoding(s);
return;
}
@@ -9194,6 +9289,12 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
}
break;
case 0x3: /* SUQADD, USQADD */
+ if (size == 3 && !is_q) {
+ unallocated_encoding(s);
+ return;
+ }
+ handle_2misc_satacc(s, false, u, is_q, size, rn, rd);
+ return;
case 0x7: /* SQABS, SQNEG */
if (size == 3 && !is_q) {
unallocated_encoding(s);
--
1.9.0
^ permalink raw reply related [flat|nested] 3+ messages in thread
end of thread, other threads:[~2014-03-18 23:26 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-03-18 23:01 [Qemu-devel] [PATCH for-2.0 v2 0/2] A64: Implement last four Neon insns Peter Maydell
2014-03-18 23:01 ` [Qemu-devel] [PATCH for-2.0 v2 1/2] target-arm: A64: Add saturating int ops (SQNEG/SQABS) Peter Maydell
2014-03-18 23:01 ` [Qemu-devel] [PATCH for-2.0 v2 2/2] target-arm: A64: Add saturating accumulate ops (USQADD/SUQADD) Peter Maydell
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).