All of lore.kernel.org
 help / color / mirror / Atom feed
From: Aurelien Jarno <aurelien@aurel32.net>
To: Richard Henderson <rth@twiddle.net>
Cc: qemu-devel@nongnu.org, proljc@gmail.com
Subject: Re: [Qemu-devel] [PATCH v2] target-mips: Implement Loongson Multimedia Instructions
Date: Sat, 8 Sep 2012 02:32:16 +0200	[thread overview]
Message-ID: <20120908003216.GL6791@ohm.aurel32.net> (raw)
In-Reply-To: <1333127593-7841-1-git-send-email-rth@twiddle.net>

On Fri, Mar 30, 2012 at 01:13:13PM -0400, Richard Henderson wrote:
> Implements all of the COP2 instructions except for the S<cond>
> family of comparisons.  The documentation is unclear for those.
> 
> Signed-off-by: Richard Henderson <rth@twiddle.net>
> ---
> 
> v2: Rebased vs master.
> 
>  Makefile.target          |    3 +
>  target-mips/helper.h     |   59 ++++
>  target-mips/lmi_helper.c |  744 ++++++++++++++++++++++++++++++++++++++++++++++
>  target-mips/translate.c  |  372 +++++++++++++++++++++++-
>  4 files changed, 1176 insertions(+), 2 deletions(-)
>  create mode 100644 target-mips/lmi_helper.c
> 
> diff --git a/Makefile.target b/Makefile.target
> index 44b2e83..35ca860 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -99,6 +99,9 @@ endif
>  libobj-$(TARGET_SPARC) += int32_helper.o
>  libobj-$(TARGET_SPARC64) += int64_helper.o
>  libobj-$(TARGET_ALPHA) += int_helper.o fpu_helper.o sys_helper.o mem_helper.o
> +ifeq ($(TARGET_BASE_ARCH), mips)
> +libobj-y += lmi_helper.o
> +endif
>  
>  libobj-y += disas.o
>  libobj-$(CONFIG_TCI_DIS) += tci-dis.o
> diff --git a/target-mips/helper.h b/target-mips/helper.h
> index 76fb451..15db21e 100644
> --- a/target-mips/helper.h
> +++ b/target-mips/helper.h
> @@ -297,4 +297,63 @@ DEF_HELPER_0(rdhwr_ccres, tl)
>  DEF_HELPER_1(pmon, void, int)
>  DEF_HELPER_0(wait, void)
>  
> +/* Loongson multimedia functions.  */
> +DEF_HELPER_FLAGS_2(paddsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(paddush, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(paddh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(paddw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(paddsb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(paddusb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(paddb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +
> +DEF_HELPER_FLAGS_2(psubsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(psubush, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(psubh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(psubw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(psubsb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(psubusb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(psubb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +
> +DEF_HELPER_FLAGS_2(pshufh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(packsswh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(packsshb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(packushb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +
> +DEF_HELPER_FLAGS_2(punpcklhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(punpckhhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(punpcklbh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(punpckhbh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(punpcklwd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(punpckhwd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +
> +DEF_HELPER_FLAGS_2(pavgh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pavgb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pmaxsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pminsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pmaxub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pminub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +
> +DEF_HELPER_FLAGS_2(pcmpeqw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pcmpgtw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pcmpeqh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pcmpgth, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pcmpeqb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pcmpgtb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +
> +DEF_HELPER_FLAGS_2(psllw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(psllh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(psrlw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(psrlh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(psraw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(psrah, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +
> +DEF_HELPER_FLAGS_2(pmullh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pmulhh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pmulhuh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_2(pmaddhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +
> +DEF_HELPER_FLAGS_2(pasubub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
> +DEF_HELPER_FLAGS_1(biadd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
> +DEF_HELPER_FLAGS_1(pmovmskb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
> +
>  #include "def-helper.h"
> diff --git a/target-mips/lmi_helper.c b/target-mips/lmi_helper.c
> new file mode 100644
> index 0000000..1b24353
> --- /dev/null
> +++ b/target-mips/lmi_helper.c
> @@ -0,0 +1,744 @@
> +/*
> + *  Loongson Multimedia Instruction emulation helpers for QEMU.
> + *
> + *  Copyright (c) 2011  Richard Henderson <rth@twiddle.net>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "cpu.h"
> +#include "helper.h"
> +
> +/* If the byte ordering doesn't matter, i.e. all columns are treated
> +   identically, then this union can be used directly.  If byte ordering
> +   does matter, we generally ignore dumping to memory.  */
> +typedef union {
> +    uint8_t  ub[8];
> +    int8_t   sb[8];
> +    uint16_t uh[4];
> +    int16_t  sh[4];
> +    uint32_t uw[2];
> +    int32_t  sw[2];
> +    uint64_t d;
> +} LMIValue;
> +
> +/* Some byte ordering issues can be mitigated by XORing in the following.  */
> +#ifdef HOST_WORDS_BIGENDIAN
> +# define BYTE_ORDER_XOR(N) N
> +#else
> +# define BYTE_ORDER_XOR(N) 0
> +#endif
> +
> +#define SATSB(x)  (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
> +#define SATUB(x)  (x > 0xff ? 0xff : x)
> +
> +#define SATSH(x)  (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
> +#define SATUH(x)  (x > 0xffff ? 0xffff : x)
> +
> +#define SATSW(x) \
> +    (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
> +#define SATUW(x)  (x > 0xffffffffull ? 0xffffffffull : x)
> +
> +uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 8; ++i) {
> +        int r = vs.sb[i] + vt.sb[i];
> +        vs.sb[i] = SATSB(r);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 8; ++i) {
> +        int r = vs.ub[i] + vt.ub[i];
> +        vs.ub[i] = SATUB(r);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; ++i) {
> +        int r = vs.sh[i] + vt.sh[i];
> +        vs.sh[i] = SATSH(r);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_paddush(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; ++i) {
> +        int r = vs.uh[i] + vt.uh[i];
> +        vs.uh[i] = SATUH(r);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_paddb(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 8; ++i) {
> +        vs.ub[i] += vt.ub[i];
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_paddh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; ++i) {
> +        vs.uh[i] += vt.uh[i];
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_paddw(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 2; ++i) {
> +        vs.uw[i] += vt.uw[i];
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 8; ++i) {
> +        int r = vs.sb[i] - vt.sb[i];
> +        vs.sb[i] = SATSB(r);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 8; ++i) {
> +        int r = vs.ub[i] - vt.ub[i];
> +        vs.ub[i] = SATUB(r);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; ++i) {
> +        int r = vs.sh[i] - vt.sh[i];
> +        vs.sh[i] = SATSH(r);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psubush(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; ++i) {
> +        int r = vs.uh[i] - vt.uh[i];
> +        vs.uh[i] = SATUH(r);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psubb(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 8; ++i) {
> +        vs.ub[i] -= vt.ub[i];
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psubh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; ++i) {
> +        vs.uh[i] -= vt.uh[i];
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psubw(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned int i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 2; ++i) {
> +        vs.uw[i] -= vt.uw[i];
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
> +{
> +    unsigned host = BYTE_ORDER_XOR(3);
> +    LMIValue vd, vs;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vd.d = 0;
> +    for (i = 0; i < 4; i++, ft >>= 2) {
> +        vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
> +    }
> +    return vd.d;
> +}
> +
> +uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
> +{
> +    uint64_t fd = 0;
> +    int64_t tmp;
> +
> +    tmp = (int32_t)(fs >> 0);
> +    tmp = SATSH(tmp);
> +    fd |= (tmp & 0xffff) << 0;
> +
> +    tmp = (int32_t)(fs >> 32);
> +    tmp = SATSH(tmp);
> +    fd |= (tmp & 0xffff) << 16;
> +
> +    tmp = (int32_t)(ft >> 0);
> +    tmp = SATSH(tmp);
> +    fd |= (tmp & 0xffff) << 32;
> +
> +    tmp = (int32_t)(ft >> 32);
> +    tmp = SATSH(tmp);
> +    fd |= (tmp & 0xffff) << 48;
> +
> +    return fd;
> +}
> +
> +uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
> +{
> +    uint64_t fd = 0;
> +    unsigned int i;
> +
> +    for (i = 0; i < 4; ++i) {
> +        int16_t tmp = fs >> (i * 16);
> +        tmp = SATSB(tmp);
> +        fd |= (uint64_t)(tmp & 0xff) << (i * 8);
> +    }
> +    for (i = 0; i < 4; ++i) {
> +        int16_t tmp = ft >> (i * 16);
> +        tmp = SATSB(tmp);
> +        fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
> +    }
> +
> +    return fd;
> +}
> +
> +uint64_t helper_packushb(uint64_t fs, uint64_t ft)
> +{
> +    uint64_t fd = 0;
> +    unsigned int i;
> +
> +    for (i = 0; i < 4; ++i) {
> +        int16_t tmp = fs >> (i * 16);
> +        tmp = SATUB(tmp);
> +        fd |= (uint64_t)(tmp & 0xff) << (i * 8);
> +    }
> +    for (i = 0; i < 4; ++i) {
> +        int16_t tmp = ft >> (i * 16);
> +        tmp = SATUB(tmp);
> +        fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
> +    }
> +
> +    return fd;
> +}
> +
> +uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
> +{
> +    return (fs & 0xffffffff) | (ft << 32);
> +}
> +
> +uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
> +{
> +    return (fs >> 32) | (ft & ~0xffffffffull);
> +}
> +
> +uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
> +{
> +    unsigned host = BYTE_ORDER_XOR(3);
> +    LMIValue vd, vs, vt;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    vd.uh[0 ^ host] = vs.uh[0 ^ host];
> +    vd.uh[1 ^ host] = vt.uh[0 ^ host];
> +    vd.uh[2 ^ host] = vs.uh[1 ^ host];
> +    vd.uh[3 ^ host] = vt.uh[1 ^ host];
> +
> +    return vd.d;
> +}
> +
> +uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
> +{
> +    unsigned host = BYTE_ORDER_XOR(3);
> +    LMIValue vd, vs, vt;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    vd.uh[0 ^ host] = vs.uh[2 ^ host];
> +    vd.uh[1 ^ host] = vt.uh[2 ^ host];
> +    vd.uh[2 ^ host] = vs.uh[3 ^ host];
> +    vd.uh[3 ^ host] = vt.uh[3 ^ host];
> +
> +    return vd.d;
> +}
> +
> +uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
> +{
> +    unsigned host = BYTE_ORDER_XOR(7);
> +    LMIValue vd, vs, vt;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    vd.ub[0 ^ host] = vs.ub[0 ^ host];
> +    vd.ub[1 ^ host] = vt.ub[0 ^ host];
> +    vd.ub[2 ^ host] = vs.ub[1 ^ host];
> +    vd.ub[3 ^ host] = vt.ub[1 ^ host];
> +    vd.ub[4 ^ host] = vs.ub[2 ^ host];
> +    vd.ub[5 ^ host] = vt.ub[2 ^ host];
> +    vd.ub[6 ^ host] = vs.ub[3 ^ host];
> +    vd.ub[7 ^ host] = vt.ub[3 ^ host];
> +
> +    return vd.d;
> +}
> +
> +uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
> +{
> +    unsigned host = BYTE_ORDER_XOR(7);
> +    LMIValue vd, vs, vt;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    vd.ub[0 ^ host] = vs.ub[4 ^ host];
> +    vd.ub[1 ^ host] = vt.ub[4 ^ host];
> +    vd.ub[2 ^ host] = vs.ub[5 ^ host];
> +    vd.ub[3 ^ host] = vt.ub[5 ^ host];
> +    vd.ub[4 ^ host] = vs.ub[6 ^ host];
> +    vd.ub[5 ^ host] = vt.ub[6 ^ host];
> +    vd.ub[6 ^ host] = vs.ub[7 ^ host];
> +    vd.ub[7 ^ host] = vt.ub[7 ^ host];
> +
> +    return vd.d;
> +}
> +
> +uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; i++) {
> +        vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 8; i++) {
> +        vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; i++) {
> +        vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; i++) {
> +        vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; i++) {
> +        vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pminub(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; i++) {
> +        vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 2; i++) {
> +        vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 2; i++) {
> +        vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; i++) {
> +        vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; i++) {
> +        vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 8; i++) {
> +        vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 8; i++) {
> +        vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psllw(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs;
> +    unsigned i;
> +
> +    ft &= 0x7f;
> +    if (ft > 31) {
> +        return 0;
> +    }
> +    vs.d = fs;
> +    for (i = 0; i < 2; ++i) {
> +        vs.uw[i] <<= ft;
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs;
> +    unsigned i;
> +
> +    ft &= 0x7f;
> +    if (ft > 31) {
> +        return 0;
> +    }
> +    vs.d = fs;
> +    for (i = 0; i < 2; ++i) {
> +        vs.uw[i] >>= ft;
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psraw(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs;
> +    unsigned i;
> +
> +    ft &= 0x7f;
> +    if (ft > 31) {
> +        ft = 31;
> +    }
> +    vs.d = fs;
> +    for (i = 0; i < 2; ++i) {
> +        vs.sw[i] >>= ft;
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psllh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs;
> +    unsigned i;
> +
> +    ft &= 0x7f;
> +    if (ft > 15) {
> +        return 0;
> +    }
> +    vs.d = fs;
> +    for (i = 0; i < 4; ++i) {
> +        vs.uh[i] <<= ft;
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs;
> +    unsigned i;
> +
> +    ft &= 0x7f;
> +    if (ft > 15) {
> +        return 0;
> +    }
> +    vs.d = fs;
> +    for (i = 0; i < 4; ++i) {
> +        vs.uh[i] >>= ft;
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_psrah(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs;
> +    unsigned i;
> +
> +    ft &= 0x7f;
> +    if (ft > 15) {
> +        ft = 15;
> +    }
> +    vs.d = fs;
> +    for (i = 0; i < 4; ++i) {
> +        vs.sh[i] >>= ft;
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; ++i) {
> +        vs.sh[i] *= vt.sh[i];
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; ++i) {
> +        int32_t r = vs.sh[i] * vt.sh[i];
> +        vs.sh[i] = r >> 16;
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 4; ++i) {
> +        uint32_t r = vs.uh[i] * vt.uh[i];
> +        vs.uh[i] = r >> 16;
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
> +{
> +    unsigned host = BYTE_ORDER_XOR(3);
> +    LMIValue vs, vt;
> +    uint32_t p0, p1;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    p0  = vs.sh[0 ^ host] * vt.sh[0 ^ host];
> +    p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
> +    p1  = vs.sh[2 ^ host] * vt.sh[2 ^ host];
> +    p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
> +
> +    return ((uint64_t)p1 << 32) | p0;
> +}
> +
> +uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
> +{
> +    LMIValue vs, vt;
> +    unsigned i;
> +
> +    vs.d = fs;
> +    vt.d = ft;
> +    for (i = 0; i < 8; ++i) {
> +        int r = vs.ub[i] - vt.ub[i];
> +        vs.ub[i] = (r < 0 ? -r : r);
> +    }
> +    return vs.d;
> +}
> +
> +uint64_t helper_biadd(uint64_t fs)
> +{
> +    unsigned i, fd;
> +
> +    for (i = fd = 0; i < 8; ++i) {
> +        fd += (fs >> (i * 8)) & 0xff;
> +    }
> +    return fd & 0xffff;
> +}
> +
> +uint64_t helper_pmovmskb(uint64_t fs)
> +{
> +    unsigned fd = 0;
> +
> +    fd |= ((fs >>  7) & 1) << 0;
> +    fd |= ((fs >> 15) & 1) << 1;
> +    fd |= ((fs >> 23) & 1) << 2;
> +    fd |= ((fs >> 31) & 1) << 3;
> +    fd |= ((fs >> 39) & 1) << 4;
> +    fd |= ((fs >> 47) & 1) << 5;
> +    fd |= ((fs >> 55) & 1) << 6;
> +    fd |= ((fs >> 63) & 1) << 7;
> +
> +    return fd & 0xff;
> +}
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index a663b74..1a8b7a5 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -446,6 +446,103 @@ enum {
>      OPC_BC2     = (0x08 << 21) | OPC_CP2,
>  };
>  
> +#define MASK_LMI(op)  MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F)
> +
> +enum {
> +    OPC_PADDSH  = (24 << 21) | (0x00) | OPC_CP2,
> +    OPC_PADDUSH = (25 << 21) | (0x00) | OPC_CP2,
> +    OPC_PADDH   = (26 << 21) | (0x00) | OPC_CP2,
> +    OPC_PADDW   = (27 << 21) | (0x00) | OPC_CP2,
> +    OPC_PADDSB  = (28 << 21) | (0x00) | OPC_CP2,
> +    OPC_PADDUSB = (29 << 21) | (0x00) | OPC_CP2,
> +    OPC_PADDB   = (30 << 21) | (0x00) | OPC_CP2,
> +    OPC_PADDD   = (31 << 21) | (0x00) | OPC_CP2,
> +
> +    OPC_PSUBSH  = (24 << 21) | (0x01) | OPC_CP2,
> +    OPC_PSUBUSH = (25 << 21) | (0x01) | OPC_CP2,
> +    OPC_PSUBH   = (26 << 21) | (0x01) | OPC_CP2,
> +    OPC_PSUBW   = (27 << 21) | (0x01) | OPC_CP2,
> +    OPC_PSUBSB  = (28 << 21) | (0x01) | OPC_CP2,
> +    OPC_PSUBUSB = (29 << 21) | (0x01) | OPC_CP2,
> +    OPC_PSUBB   = (30 << 21) | (0x01) | OPC_CP2,
> +    OPC_PSUBD   = (31 << 21) | (0x01) | OPC_CP2,
> +
> +    OPC_PSHUFH   = (24 << 21) | (0x02) | OPC_CP2,
> +    OPC_PACKSSWH = (25 << 21) | (0x02) | OPC_CP2,
> +    OPC_PACKSSHB = (26 << 21) | (0x02) | OPC_CP2,
> +    OPC_PACKUSHB = (27 << 21) | (0x02) | OPC_CP2,
> +    OPC_XOR_CP2  = (28 << 21) | (0x02) | OPC_CP2,
> +    OPC_NOR_CP2  = (29 << 21) | (0x02) | OPC_CP2,
> +    OPC_AND_CP2  = (30 << 21) | (0x02) | OPC_CP2,
> +    OPC_PANDN    = (31 << 21) | (0x02) | OPC_CP2,
> +
> +    OPC_PUNPCKLHW = (24 << 21) | (0x03) | OPC_CP2,
> +    OPC_PUNPCKHHW = (25 << 21) | (0x03) | OPC_CP2,
> +    OPC_PUNPCKLBH = (26 << 21) | (0x03) | OPC_CP2,
> +    OPC_PUNPCKHBH = (27 << 21) | (0x03) | OPC_CP2,
> +    OPC_PINSRH_0  = (28 << 21) | (0x03) | OPC_CP2,
> +    OPC_PINSRH_1  = (29 << 21) | (0x03) | OPC_CP2,
> +    OPC_PINSRH_2  = (30 << 21) | (0x03) | OPC_CP2,
> +    OPC_PINSRH_3  = (31 << 21) | (0x03) | OPC_CP2,
> +
> +    OPC_PAVGH   = (24 << 21) | (0x08) | OPC_CP2,
> +    OPC_PAVGB   = (25 << 21) | (0x08) | OPC_CP2,
> +    OPC_PMAXSH  = (26 << 21) | (0x08) | OPC_CP2,
> +    OPC_PMINSH  = (27 << 21) | (0x08) | OPC_CP2,
> +    OPC_PMAXUB  = (28 << 21) | (0x08) | OPC_CP2,
> +    OPC_PMINUB  = (29 << 21) | (0x08) | OPC_CP2,
> +
> +    OPC_PCMPEQW = (24 << 21) | (0x09) | OPC_CP2,
> +    OPC_PCMPGTW = (25 << 21) | (0x09) | OPC_CP2,
> +    OPC_PCMPEQH = (26 << 21) | (0x09) | OPC_CP2,
> +    OPC_PCMPGTH = (27 << 21) | (0x09) | OPC_CP2,
> +    OPC_PCMPEQB = (28 << 21) | (0x09) | OPC_CP2,
> +    OPC_PCMPGTB = (29 << 21) | (0x09) | OPC_CP2,
> +
> +    OPC_PSLLW   = (24 << 21) | (0x0A) | OPC_CP2,
> +    OPC_PSLLH   = (25 << 21) | (0x0A) | OPC_CP2,
> +    OPC_PMULLH  = (26 << 21) | (0x0A) | OPC_CP2,
> +    OPC_PMULHH  = (27 << 21) | (0x0A) | OPC_CP2,
> +    OPC_PMULUW  = (28 << 21) | (0x0A) | OPC_CP2,
> +    OPC_PMULHUH = (29 << 21) | (0x0A) | OPC_CP2,
> +
> +    OPC_PSRLW     = (24 << 21) | (0x0B) | OPC_CP2,
> +    OPC_PSRLH     = (25 << 21) | (0x0B) | OPC_CP2,
> +    OPC_PSRAW     = (26 << 21) | (0x0B) | OPC_CP2,
> +    OPC_PSRAH     = (27 << 21) | (0x0B) | OPC_CP2,
> +    OPC_PUNPCKLWD = (28 << 21) | (0x0B) | OPC_CP2,
> +    OPC_PUNPCKHWD = (29 << 21) | (0x0B) | OPC_CP2,
> +
> +    OPC_ADDU_CP2 = (24 << 21) | (0x0C) | OPC_CP2,
> +    OPC_OR_CP2   = (25 << 21) | (0x0C) | OPC_CP2,
> +    OPC_ADD_CP2  = (26 << 21) | (0x0C) | OPC_CP2,
> +    OPC_DADD_CP2 = (27 << 21) | (0x0C) | OPC_CP2,
> +    OPC_SEQU_CP2 = (28 << 21) | (0x0C) | OPC_CP2,
> +    OPC_SEQ_CP2  = (29 << 21) | (0x0C) | OPC_CP2,
> +
> +    OPC_SUBU_CP2 = (24 << 21) | (0x0D) | OPC_CP2,
> +    OPC_PASUBUB  = (25 << 21) | (0x0D) | OPC_CP2,
> +    OPC_SUB_CP2  = (26 << 21) | (0x0D) | OPC_CP2,
> +    OPC_DSUB_CP2 = (27 << 21) | (0x0D) | OPC_CP2,
> +    OPC_SLTU_CP2 = (28 << 21) | (0x0D) | OPC_CP2,
> +    OPC_SLT_CP2  = (29 << 21) | (0x0D) | OPC_CP2,
> +
> +    OPC_SLL_CP2  = (24 << 21) | (0x0E) | OPC_CP2,
> +    OPC_DSLL_CP2 = (25 << 21) | (0x0E) | OPC_CP2,
> +    OPC_PEXTRH   = (26 << 21) | (0x0E) | OPC_CP2,
> +    OPC_PMADDHW  = (27 << 21) | (0x0E) | OPC_CP2,
> +    OPC_SLEU_CP2 = (28 << 21) | (0x0E) | OPC_CP2,
> +    OPC_SLE_CP2  = (29 << 21) | (0x0E) | OPC_CP2,
> +
> +    OPC_SRL_CP2  = (24 << 21) | (0x0F) | OPC_CP2,
> +    OPC_DSRL_CP2 = (25 << 21) | (0x0F) | OPC_CP2,
> +    OPC_SRA_CP2  = (26 << 21) | (0x0F) | OPC_CP2,
> +    OPC_DSRA_CP2 = (27 << 21) | (0x0F) | OPC_CP2,
> +    OPC_BIADD    = (28 << 21) | (0x0F) | OPC_CP2,
> +    OPC_PMOVMSKB = (29 << 21) | (0x0F) | OPC_CP2,
> +};
> +
> +
>  #define MASK_CP3(op)       MASK_OP_MAJOR(op) | (op & 0x3F)
>  
>  enum {
> @@ -2571,7 +2668,274 @@ static void gen_loongson_integer (DisasContext *ctx, uint32_t opc,
>      }
>  
>      (void)opn; /* avoid a compiler warning */
> -    MIPS_DEBUG("%s %s, %s", opn, regnames[rd], regnames[rs]);
> +    MIPS_DEBUG("%s %s, %s, %s", opn, regnames[rd], regnames[rs], regnames[rt]);
> +    tcg_temp_free(t0);
> +    tcg_temp_free(t1);
> +}
> +
> +/* Loongson multimedia instructions */
> +static void gen_loongson_multimedia (DisasContext *ctx, int rd, int rs, int rt)
> +{
> +    const char *opn = "loongson_cp2";
> +    uint32_t opc, shift_max;
> +    TCGv_i64 t0, t1;
> +
> +    opc = MASK_LMI (ctx->opcode);
> +    switch (opc) {
> +    case OPC_ADD_CP2:
> +    case OPC_SUB_CP2:
> +    case OPC_DADD_CP2:
> +    case OPC_DSUB_CP2:
> +        t0 = tcg_temp_local_new_i64();
> +        t1 = tcg_temp_local_new_i64();
> +        break;
> +    default:
> +        t0 = tcg_temp_new_i64();
> +        t1 = tcg_temp_new_i64();
> +        break;
> +    }
> +
> +    gen_load_fpr64(ctx, t0, rs);
> +    gen_load_fpr64(ctx, t1, rt);
> +
> +#define LMI_HELPER(UP,LO) \
> +    case OPC_##UP: gen_helper_##LO(t0, t0, t1); opn = #LO; break
> +#define LMI_HELPER_1(UP,LO) \
> +    case OPC_##UP: gen_helper_##LO(t0, t0); opn = #LO; break
> +#define LMI_DIRECT(UP,LO,OP) \
> +    case OPC_##UP: tcg_gen_##OP##_i64(t0, t0, t1); opn = #LO; break
> +
> +    switch (opc) {
> +    LMI_HELPER(PADDSH, paddsh);
> +    LMI_HELPER(PADDUSH, paddush);
> +    LMI_HELPER(PADDH, paddh);
> +    LMI_HELPER(PADDW, paddw);
> +    LMI_HELPER(PADDSB, paddsb);
> +    LMI_HELPER(PADDUSB, paddusb);
> +    LMI_HELPER(PADDB, paddb);
> +
> +    LMI_HELPER(PSUBSH, psubsh);
> +    LMI_HELPER(PSUBUSH, psubush);
> +    LMI_HELPER(PSUBH, psubh);
> +    LMI_HELPER(PSUBW, psubw);
> +    LMI_HELPER(PSUBSB, psubsb);
> +    LMI_HELPER(PSUBUSB, psubusb);
> +    LMI_HELPER(PSUBB, psubb);
> +
> +    LMI_HELPER(PSHUFH, pshufh);
> +    LMI_HELPER(PACKSSWH, packsswh);
> +    LMI_HELPER(PACKSSHB, packsshb);
> +    LMI_HELPER(PACKUSHB, packushb);
> +
> +    LMI_HELPER(PUNPCKLHW, punpcklhw);
> +    LMI_HELPER(PUNPCKHHW, punpckhhw);
> +    LMI_HELPER(PUNPCKLBH, punpcklbh);
> +    LMI_HELPER(PUNPCKHBH, punpckhbh);
> +    LMI_HELPER(PUNPCKLWD, punpcklwd);
> +    LMI_HELPER(PUNPCKHWD, punpckhwd);
> +
> +    LMI_HELPER(PAVGH, pavgh);
> +    LMI_HELPER(PAVGB, pavgb);
> +    LMI_HELPER(PMAXSH, pmaxsh);
> +    LMI_HELPER(PMINSH, pminsh);
> +    LMI_HELPER(PMAXUB, pmaxub);
> +    LMI_HELPER(PMINUB, pminub);
> +
> +    LMI_HELPER(PCMPEQW, pcmpeqw);
> +    LMI_HELPER(PCMPGTW, pcmpgtw);
> +    LMI_HELPER(PCMPEQH, pcmpeqh);
> +    LMI_HELPER(PCMPGTH, pcmpgth);
> +    LMI_HELPER(PCMPEQB, pcmpeqb);
> +    LMI_HELPER(PCMPGTB, pcmpgtb);
> +
> +    LMI_HELPER(PSLLW, psllw);
> +    LMI_HELPER(PSLLH, psllh);
> +    LMI_HELPER(PSRLW, psrlw);
> +    LMI_HELPER(PSRLH, psrlh);
> +    LMI_HELPER(PSRAW, psraw);
> +    LMI_HELPER(PSRAH, psrah);
> +
> +    LMI_HELPER(PMULLH, pmullh);
> +    LMI_HELPER(PMULHH, pmulhh);
> +    LMI_HELPER(PMULHUH, pmulhuh);
> +    LMI_HELPER(PMADDHW, pmaddhw);
> +
> +    LMI_HELPER(PASUBUB, pasubub);
> +    LMI_HELPER_1(BIADD, biadd);
> +    LMI_HELPER_1(PMOVMSKB, pmovmskb);
> +
> +    LMI_DIRECT(PADDD, paddd, add);
> +    LMI_DIRECT(PSUBD, psubd, sub);
> +    LMI_DIRECT(XOR_CP2, xor, xor);
> +    LMI_DIRECT(NOR_CP2, nor, nor);
> +    LMI_DIRECT(AND_CP2, and, and);
> +    LMI_DIRECT(PANDN, pandn, andc);
> +    LMI_DIRECT(OR, or, or);
> +
> +    case OPC_PINSRH_0:
> +        tcg_gen_deposit_i64(t0, t0, t1, 0, 16);
> +        opn = "pinsrh_0";
> +        break;
> +    case OPC_PINSRH_1:
> +        tcg_gen_deposit_i64(t0, t0, t1, 16, 16);
> +        opn = "pinsrh_1";
> +        break;
> +    case OPC_PINSRH_2:
> +        tcg_gen_deposit_i64(t0, t0, t1, 32, 16);
> +        opn = "pinsrh_2";
> +        break;
> +    case OPC_PINSRH_3:
> +        tcg_gen_deposit_i64(t0, t0, t1, 48, 16);
> +        opn = "pinsrh_3";
> +        break;
> +
> +    case OPC_PEXTRH:
> +        tcg_gen_andi_i64(t1, t1, 3);
> +        tcg_gen_shli_i64(t1, t1, 4);
> +        tcg_gen_shr_i64(t0, t0, t1);
> +        tcg_gen_ext16u_i64(t0, t0);
> +        opn = "pextrh";
> +        break;
> +
> +    case OPC_ADDU_CP2:
> +        tcg_gen_add_i64(t0, t0, t1);
> +        tcg_gen_ext32s_i64(t0, t0);
> +        opn = "addu";
> +        break;
> +    case OPC_SUBU_CP2:
> +        tcg_gen_sub_i64(t0, t0, t1);
> +        tcg_gen_ext32s_i64(t0, t0);
> +        opn = "addu";
> +        break;
> +
> +    case OPC_SLL_CP2:
> +        opn = "sll";
> +        shift_max = 32;
> +        goto do_shift;
> +    case OPC_SRL_CP2:
> +        opn = "srl";
> +        shift_max = 32;
> +        goto do_shift;
> +    case OPC_SRA_CP2:
> +        opn = "sra";
> +        shift_max = 32;
> +        goto do_shift;
> +    case OPC_DSLL_CP2:
> +        opn = "dsll";
> +        shift_max = 64;
> +        goto do_shift;
> +    case OPC_DSRL_CP2:
> +        opn = "dsrl";
> +        shift_max = 64;
> +        goto do_shift;
> +    case OPC_DSRA_CP2:
> +        opn = "dsra";
> +        shift_max = 64;
> +        goto do_shift;
> +    do_shift:
> +        switch (opc) {
> +        case OPC_SLL_CP2:
> +        case OPC_DSLL_CP2:
> +            tcg_gen_shl_i64(t0, t0, t1);
> +            break;
> +        case OPC_SRA_CP2:
> +        case OPC_DSRA_CP2:
> +            /* Since SRA is UndefinedResult without sign-extended inputs,
> +               we can treat SRA and DSRA the same.  */
> +            tcg_gen_sar_i64(t0, t0, t1);
> +            break;
> +        case OPC_SRL_CP2:
> +            /* We want to shift in zeros for SRL; zero-extend first.  */
> +            tcg_gen_ext32u_i64(t0, t0);
> +            /* FALLTHRU */
> +        case OPC_DSRL_CP2:
> +            tcg_gen_shr_i64(t0, t0, t1);
> +            break;
> +        }

You probably want to and t1 with 0x3f, to make sure to not have a shift
larger then 64.

> +        if (shift_max == 32) {
> +            tcg_gen_ext32s_i64(t0, t0);
> +        }
> +
> +        /* Shifts larger than MAX produce zero.  */
> +        tcg_gen_setcondi_i64(TCG_COND_LTU, t1, t1, shift_max);
> +        tcg_gen_neg_i64(t1, t1);

I guess you want tcg_gen_subi_i64(t1, t1, 1);

> +        tcg_gen_and_i64(t0, t0, t1);
> +        break;
> +
> +    case OPC_ADD_CP2:
> +    case OPC_DADD_CP2:
> +        {
> +            /* Since ADD is UndefinedResult without sign-extended inputs,
> +               we can treat both ADD and DADD the same.  */

I don't think this is correct. For ADD, the result has to be signed
extended. Also the exception condition is not the same for ADD and DADD.

> +            TCGv_i64 t2 = tcg_temp_new_i64();
> +            int lab = gen_new_label();
> +
> +            tcg_gen_mov_i64(t2, t0);
> +            tcg_gen_add_i64(t0, t1, t2);
> +            tcg_gen_xor_i64(t1, t1, t2);
> +            tcg_gen_xor_i64(t2, t2, t0);
> +            tcg_gen_andc_i64(t1, t2, t1);
> +            tcg_temp_free_i64(t2);
> +            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
> +            generate_exception(ctx, EXCP_OVERFLOW);
> +            gen_set_label(lab);
> +
> +            opn = (opc == OPC_ADD_CP2 ? "add" : "dadd");
> +            break;
> +        }

I don't think this is correct there.

> +
> +    case OPC_SUB_CP2:
> +    case OPC_DSUB_CP2:
> +        {
> +            /* Since SUB is UndefinedResult without sign-extended inputs,
> +               we can treat both SUB and DSUB the same.  */

Ditto.

> +            TCGv_i64 t2 = tcg_temp_new_i64();
> +            int lab = gen_new_label();
> +
> +            tcg_gen_mov_i64(t2, t0);
> +            tcg_gen_sub_i64(t0, t1, t2);
> +            tcg_gen_xor_i64(t1, t1, t2);
> +            tcg_gen_xor_i64(t2, t2, t0);
> +            tcg_gen_and_i64(t1, t1, t2);
> +            tcg_temp_free_i64(t2);
> +            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
> +            generate_exception(ctx, EXCP_OVERFLOW);
> +            gen_set_label(lab);
> +
> +            opn = (opc == OPC_SUB_CP2 ? "sub" : "dsub");
> +            break;
> +        }
> +
> +    case OPC_PMULUW:
> +        tcg_gen_ext32u_i64(t0, t0);
> +        tcg_gen_ext32u_i64(t1, t1);
> +        tcg_gen_mul_i64(t0, t0, t1);
> +        opn = "pmuluw";
> +        break;
> +
> +    case OPC_SEQU_CP2:
> +    case OPC_SEQ_CP2:
> +    case OPC_SLTU_CP2:
> +    case OPC_SLT_CP2:
> +    case OPC_SLEU_CP2:
> +    case OPC_SLE_CP2:
> +        /* ??? Document is unclear: Set FCC[CC].  Does that mean the
> +           FD field is the CC field?  */
> +    default:
> +        MIPS_INVAL(opn);
> +        generate_exception (ctx, EXCP_RI);
> +        return;
> +    }
> +
> +#undef LMI_HELPER
> +#undef LMI_DIRECT
> +
> +    gen_store_fpr64(ctx, t0, rd);
> +
> +    (void)opn; /* avoid a compiler warning */
> +    MIPS_DEBUG("%s %s, %s, %s", opn,
> +               fregnames[rd], fregnames[rs], fregnames[rt]);
>      tcg_temp_free(t0);
>      tcg_temp_free(t1);
>  }
> @@ -12278,10 +12642,14 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch)
>      case OPC_LDC2:
>      case OPC_SWC2:
>      case OPC_SDC2:
> -    case OPC_CP2:
>          /* COP2: Not implemented. */
>          generate_exception_err(ctx, EXCP_CpU, 2);
>          break;
> +    case OPC_CP2:
> +        check_insn(env, ctx, INSN_LOONGSON2F);
> +        /* Note that these instructions use different fields.  */
> +        gen_loongson_multimedia(ctx, sa, rd, rt);
> +        break;
>  
>      case OPC_CP3:
>          if (env->CP0_Config1 & (1 << CP0C1_FP)) {
> -- 
> 1.7.7.6
> 
> 
> 

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurelien@aurel32.net                 http://www.aurel32.net

  reply	other threads:[~2012-09-08  0:32 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-03-30 17:13 [Qemu-devel] [PATCH v2] target-mips: Implement Loongson Multimedia Instructions Richard Henderson
2012-09-08  0:32 ` Aurelien Jarno [this message]
2012-09-17 18:54   ` Richard Henderson
2012-09-17 19:03     ` Aurelien Jarno

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20120908003216.GL6791@ohm.aurel32.net \
    --to=aurelien@aurel32.net \
    --cc=proljc@gmail.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.