From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from [140.186.70.92] (port=56407 helo=eggs.gnu.org) by lists.gnu.org with esmtp (Exim 4.43) id 1PlRnq-00030k-Na for qemu-devel@nongnu.org; Fri, 04 Feb 2011 14:58:47 -0500 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1PlRnp-0002sN-AQ for qemu-devel@nongnu.org; Fri, 04 Feb 2011 14:58:46 -0500 Received: from hall.aurel32.net ([88.191.126.93]:33681) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1PlRno-0002sG-VV for qemu-devel@nongnu.org; Fri, 04 Feb 2011 14:58:45 -0500 Date: Fri, 4 Feb 2011 20:58:48 +0100 From: Aurelien Jarno Subject: Re: [Qemu-devel] [PATCH v3] Set the right overflow bit for neon 32 and 64 bit saturating add/sub. Message-ID: <20110204195848.GD10125@volta.aurel32.net> References: <4D4C0A8F.2080203@st.com> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-15 Content-Disposition: inline In-Reply-To: <4D4C0A8F.2080203@st.com> List-Id: qemu-devel.nongnu.org List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Christophe Lyon Cc: "qemu-devel@nongnu.org" On Fri, Feb 04, 2011 at 03:17:51PM +0100, Christophe Lyon wrote: > > Signed-off-by: Christophe Lyon > --- > target-arm/helpers.h | 12 ++++-- > target-arm/neon_helper.c | 89 ++++++++++++++++++++++++++++++++++++++++++++++ > target-arm/op_helper.c | 49 ------------------------- > target-arm/translate.c | 18 ++++----- > 4 files changed, 105 insertions(+), 63 deletions(-) Thanks, applied. > diff --git a/target-arm/helpers.h b/target-arm/helpers.h > index 8cc6a44..4d0de00 100644 > --- a/target-arm/helpers.h > +++ b/target-arm/helpers.h > @@ -137,10 +137,6 @@ DEF_HELPER_2(rsqrte_f32, f32, f32, env) > DEF_HELPER_2(recpe_u32, i32, i32, env) > DEF_HELPER_2(rsqrte_u32, i32, i32, env) > DEF_HELPER_4(neon_tbl, i32, i32, i32, i32, i32) > -DEF_HELPER_2(neon_add_saturate_u64, i64, i64, i64) > -DEF_HELPER_2(neon_add_saturate_s64, i64, i64, i64) > -DEF_HELPER_2(neon_sub_saturate_u64, i64, i64, i64) > -DEF_HELPER_2(neon_sub_saturate_s64, i64, i64, i64) > > DEF_HELPER_2(add_cc, i32, i32, i32) > DEF_HELPER_2(adc_cc, i32, i32, i32) > @@ -160,10 +156,18 @@ DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32) > DEF_HELPER_3(neon_qadd_s8, i32, env, i32, i32) > DEF_HELPER_3(neon_qadd_u16, i32, env, i32, i32) > DEF_HELPER_3(neon_qadd_s16, i32, env, i32, i32) > +DEF_HELPER_3(neon_qadd_u32, i32, env, i32, i32) > +DEF_HELPER_3(neon_qadd_s32, i32, env, i32, i32) > DEF_HELPER_3(neon_qsub_u8, i32, env, i32, i32) > DEF_HELPER_3(neon_qsub_s8, i32, env, i32, i32) > DEF_HELPER_3(neon_qsub_u16, i32, env, i32, i32) > DEF_HELPER_3(neon_qsub_s16, i32, env, i32, i32) > +DEF_HELPER_3(neon_qsub_u32, i32, env, i32, i32) > +DEF_HELPER_3(neon_qsub_s32, i32, env, i32, i32) > +DEF_HELPER_3(neon_qadd_u64, i64, env, i64, i64) > +DEF_HELPER_3(neon_qadd_s64, i64, env, i64, i64) > +DEF_HELPER_3(neon_qsub_u64, i64, env, i64, i64) > +DEF_HELPER_3(neon_qsub_s64, i64, env, i64, i64) > > DEF_HELPER_2(neon_hadd_s8, i32, i32, i32) > DEF_HELPER_2(neon_hadd_u8, i32, i32, i32) > diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c > index 2f96575..9641ccc 100644 > --- a/target-arm/neon_helper.c > +++ b/target-arm/neon_helper.c > @@ -198,6 +198,28 @@ NEON_VOP_ENV(qadd_u16, neon_u16, 2) > #undef NEON_FN > #undef NEON_USAT > > +uint32_t HELPER(neon_qadd_u32)(CPUState *env, uint32_t a, uint32_t b) > +{ > + uint32_t res = a + b; > + if (res < a) { > + SET_QC(); > + res = ~0; > + } > + return res; > +} > + > +uint64_t HELPER(neon_qadd_u64)(CPUState *env, uint64_t src1, uint64_t src2) > +{ > + uint64_t res; > + > + res = src1 + src2; > + if (res < src1) { > + SET_QC(); > + res = ~(uint64_t)0; > + } > + return res; > +} > + > #define NEON_SSAT(dest, src1, src2, type) do { \ > int32_t tmp = (uint32_t)src1 + (uint32_t)src2; \ > if (tmp != (type)tmp) { \ > @@ -218,6 +240,28 @@ NEON_VOP_ENV(qadd_s16, neon_s16, 2) > #undef NEON_FN > #undef NEON_SSAT > > +uint32_t HELPER(neon_qadd_s32)(CPUState *env, uint32_t a, uint32_t b) > +{ > + uint32_t res = a + b; > + if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) { > + SET_QC(); > + res = ~(((int32_t)a >> 31) ^ SIGNBIT); > + } > + return res; > +} > + > +uint64_t HELPER(neon_qadd_s64)(CPUState *env, uint64_t src1, uint64_t src2) > +{ > + uint64_t res; > + > + res = src1 + src2; > + if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) { > + SET_QC(); > + res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; > + } > + return res; > +} > + > #define NEON_USAT(dest, src1, src2, type) do { \ > uint32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ > if (tmp != (type)tmp) { \ > @@ -234,6 +278,29 @@ NEON_VOP_ENV(qsub_u16, neon_u16, 2) > #undef NEON_FN > #undef NEON_USAT > > +uint32_t HELPER(neon_qsub_u32)(CPUState *env, uint32_t a, uint32_t b) > +{ > + uint32_t res = a - b; > + if (res > a) { > + SET_QC(); > + res = 0; > + } > + return res; > +} > + > +uint64_t HELPER(neon_qsub_u64)(CPUState *env, uint64_t src1, uint64_t src2) > +{ > + uint64_t res; > + > + if (src1 < src2) { > + SET_QC(); > + res = 0; > + } else { > + res = src1 - src2; > + } > + return res; > +} > + > #define NEON_SSAT(dest, src1, src2, type) do { \ > int32_t tmp = (uint32_t)src1 - (uint32_t)src2; \ > if (tmp != (type)tmp) { \ > @@ -254,6 +321,28 @@ NEON_VOP_ENV(qsub_s16, neon_s16, 2) > #undef NEON_FN > #undef NEON_SSAT > > +uint32_t HELPER(neon_qsub_s32)(CPUState *env, uint32_t a, uint32_t b) > +{ > + uint32_t res = a - b; > + if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) { > + SET_QC(); > + res = ~(((int32_t)a >> 31) ^ SIGNBIT); > + } > + return res; > +} > + > +uint64_t HELPER(neon_qsub_s64)(CPUState *env, uint64_t src1, uint64_t src2) > +{ > + uint64_t res; > + > + res = src1 - src2; > + if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) { > + SET_QC(); > + res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; > + } > + return res; > +} > + > #define NEON_FN(dest, src1, src2) dest = (src1 + src2) >> 1 > NEON_VOP(hadd_s8, neon_s8, 4) > NEON_VOP(hadd_u8, neon_u8, 4) > diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c > index 43baa63..3de2610 100644 > --- a/target-arm/op_helper.c > +++ b/target-arm/op_helper.c > @@ -424,52 +424,3 @@ uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i) > return ((uint32_t)x >> shift) | (x << (32 - shift)); > } > } > - > -uint64_t HELPER(neon_add_saturate_s64)(uint64_t src1, uint64_t src2) > -{ > - uint64_t res; > - > - res = src1 + src2; > - if (((res ^ src1) & SIGNBIT64) && !((src1 ^ src2) & SIGNBIT64)) { > - env->QF = 1; > - res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; > - } > - return res; > -} > - > -uint64_t HELPER(neon_add_saturate_u64)(uint64_t src1, uint64_t src2) > -{ > - uint64_t res; > - > - res = src1 + src2; > - if (res < src1) { > - env->QF = 1; > - res = ~(uint64_t)0; > - } > - return res; > -} > - > -uint64_t HELPER(neon_sub_saturate_s64)(uint64_t src1, uint64_t src2) > -{ > - uint64_t res; > - > - res = src1 - src2; > - if (((res ^ src1) & SIGNBIT64) && ((src1 ^ src2) & SIGNBIT64)) { > - env->QF = 1; > - res = ((int64_t)src1 >> 63) ^ ~SIGNBIT64; > - } > - return res; > -} > - > -uint64_t HELPER(neon_sub_saturate_u64)(uint64_t src1, uint64_t src2) > -{ > - uint64_t res; > - > - if (src1 < src2) { > - env->QF = 1; > - res = 0; > - } else { > - res = src1 - src2; > - } > - return res; > -} > diff --git a/target-arm/translate.c b/target-arm/translate.c > index 9150242..f8606bd 100644 > --- a/target-arm/translate.c > +++ b/target-arm/translate.c > @@ -3539,12 +3539,6 @@ static inline void gen_neon_rsb(int size, TCGv t0, TCGv t1) > #define gen_helper_neon_pmin_s32 gen_helper_neon_min_s32 > #define gen_helper_neon_pmin_u32 gen_helper_neon_min_u32 > > -/* FIXME: This is wrong. They set the wrong overflow bit. */ > -#define gen_helper_neon_qadd_s32(a, e, b, c) gen_helper_add_saturate(a, b, c) > -#define gen_helper_neon_qadd_u32(a, e, b, c) gen_helper_add_usaturate(a, b, c) > -#define gen_helper_neon_qsub_s32(a, e, b, c) gen_helper_sub_saturate(a, b, c) > -#define gen_helper_neon_qsub_u32(a, e, b, c) gen_helper_sub_usaturate(a, b, c) > - > #define GEN_NEON_INTEGER_OP_ENV(name) do { \ > switch ((size << 1) | u) { \ > case 0: \ > @@ -4243,16 +4237,20 @@ static int disas_neon_data_insn(CPUState * env, DisasContext *s, uint32_t insn) > switch (op) { > case 1: /* VQADD */ > if (u) { > - gen_helper_neon_add_saturate_u64(CPU_V001); > + gen_helper_neon_qadd_u64(cpu_V0, cpu_env, > + cpu_V0, cpu_V1); > } else { > - gen_helper_neon_add_saturate_s64(CPU_V001); > + gen_helper_neon_qadd_s64(cpu_V0, cpu_env, > + cpu_V0, cpu_V1); > } > break; > case 5: /* VQSUB */ > if (u) { > - gen_helper_neon_sub_saturate_u64(CPU_V001); > + gen_helper_neon_qsub_u64(cpu_V0, cpu_env, > + cpu_V0, cpu_V1); > } else { > - gen_helper_neon_sub_saturate_s64(CPU_V001); > + gen_helper_neon_qsub_s64(cpu_V0, cpu_env, > + cpu_V0, cpu_V1); > } > break; > case 8: /* VSHL */ > -- > 1.7.2.3 > > > -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurelien@aurel32.net http://www.aurel32.net