From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([208.118.235.92]:34056) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TEQIs-0005OT-0o for qemu-devel@nongnu.org; Wed, 19 Sep 2012 15:51:25 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1TEQIo-0005EJ-79 for qemu-devel@nongnu.org; Wed, 19 Sep 2012 15:51:21 -0400 Received: from hall.aurel32.net ([88.191.126.93]:42562) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1TEQIn-0005C6-Jq for qemu-devel@nongnu.org; Wed, 19 Sep 2012 15:51:18 -0400 Date: Wed, 19 Sep 2012 21:51:13 +0200 From: Aurelien Jarno Message-ID: <20120919195113.GA11479@ohm.aurel32.net> References: <1348030784-14178-1-git-send-email-rth@twiddle.net> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-15 Content-Disposition: inline In-Reply-To: <1348030784-14178-1-git-send-email-rth@twiddle.net> Subject: Re: [Qemu-devel] [PATCH] target-mips: Implement Loongson Multimedia Instructions List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Richard Henderson Cc: qemu-devel@nongnu.org On Tue, Sep 18, 2012 at 09:59:44PM -0700, Richard Henderson wrote: > Implements all of the COP2 instructions except for the S > family of comparisons. The documentation is unclear for those. > > Signed-off-by: Richard Henderson > --- > > It turns out that the previous patch was totally independent of the > other 6 patches in the previous series. That said, I've pulled the > patch out to its own branch: > > git://repo.or.cz/qemu/rth.git rth/mips/lmi > > > r~ > > > target-mips/Makefile.objs | 2 +- > target-mips/helper.h | 59 ++++ > target-mips/lmi_helper.c | 744 ++++++++++++++++++++++++++++++++++++++++++++++ > target-mips/translate.c | 379 ++++++++++++++++++++++- > 4 files changed, 1180 insertions(+), 4 deletions(-) > create mode 100644 target-mips/lmi_helper.c > > diff --git a/target-mips/Makefile.objs b/target-mips/Makefile.objs > index ca20f21..3eeeeac 100644 > --- a/target-mips/Makefile.objs > +++ b/target-mips/Makefile.objs > @@ -1,2 +1,2 @@ > -obj-y += translate.o op_helper.o helper.o cpu.o > +obj-y += translate.o op_helper.o lmi_helper.o helper.o cpu.o > obj-$(CONFIG_SOFTMMU) += machine.o > diff --git a/target-mips/helper.h b/target-mips/helper.h > index 109ac37..f35ed78 100644 > --- a/target-mips/helper.h > +++ b/target-mips/helper.h > @@ -303,4 +303,63 @@ DEF_HELPER_1(rdhwr_ccres, tl, env) > DEF_HELPER_2(pmon, void, env, int) > DEF_HELPER_1(wait, void, env) > > +/* Loongson multimedia functions. */ > +DEF_HELPER_FLAGS_2(paddsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(paddush, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(paddh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(paddw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(paddsb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(paddusb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(paddb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > + > +DEF_HELPER_FLAGS_2(psubsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(psubush, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(psubh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(psubw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(psubsb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(psubusb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(psubb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > + > +DEF_HELPER_FLAGS_2(pshufh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(packsswh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(packsshb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(packushb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > + > +DEF_HELPER_FLAGS_2(punpcklhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(punpckhhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(punpcklbh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(punpckhbh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(punpcklwd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(punpckhwd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > + > +DEF_HELPER_FLAGS_2(pavgh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pavgb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pmaxsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pminsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pmaxub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pminub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > + > +DEF_HELPER_FLAGS_2(pcmpeqw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pcmpgtw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pcmpeqh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pcmpgth, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pcmpeqb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pcmpgtb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > + > +DEF_HELPER_FLAGS_2(psllw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(psllh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(psrlw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(psrlh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(psraw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(psrah, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > + > +DEF_HELPER_FLAGS_2(pmullh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pmulhh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pmulhuh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_2(pmaddhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > + > +DEF_HELPER_FLAGS_2(pasubub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64) > +DEF_HELPER_FLAGS_1(biadd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) > +DEF_HELPER_FLAGS_1(pmovmskb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64) > + > #include "def-helper.h" > diff --git a/target-mips/lmi_helper.c b/target-mips/lmi_helper.c > new file mode 100644 > index 0000000..1b24353 > --- /dev/null > +++ b/target-mips/lmi_helper.c > @@ -0,0 +1,744 @@ > +/* > + * Loongson Multimedia Instruction emulation helpers for QEMU. > + * > + * Copyright (c) 2011 Richard Henderson > + * > + * This library is free software; you can redistribute it and/or > + * modify it under the terms of the GNU Lesser General Public > + * License as published by the Free Software Foundation; either > + * version 2 of the License, or (at your option) any later version. > + * > + * This library is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * Lesser General Public License for more details. > + * > + * You should have received a copy of the GNU Lesser General Public > + * License along with this library; if not, see . > + */ > + > +#include "cpu.h" > +#include "helper.h" > + > +/* If the byte ordering doesn't matter, i.e. all columns are treated > + identically, then this union can be used directly. If byte ordering > + does matter, we generally ignore dumping to memory. */ > +typedef union { > + uint8_t ub[8]; > + int8_t sb[8]; > + uint16_t uh[4]; > + int16_t sh[4]; > + uint32_t uw[2]; > + int32_t sw[2]; > + uint64_t d; > +} LMIValue; > + > +/* Some byte ordering issues can be mitigated by XORing in the following. */ > +#ifdef HOST_WORDS_BIGENDIAN > +# define BYTE_ORDER_XOR(N) N > +#else > +# define BYTE_ORDER_XOR(N) 0 > +#endif > + > +#define SATSB(x) (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x) > +#define SATUB(x) (x > 0xff ? 0xff : x) > + > +#define SATSH(x) (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x) > +#define SATUH(x) (x > 0xffff ? 0xffff : x) > + > +#define SATSW(x) \ > + (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x) > +#define SATUW(x) (x > 0xffffffffull ? 0xffffffffull : x) > + > +uint64_t helper_paddsb(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 8; ++i) { > + int r = vs.sb[i] + vt.sb[i]; > + vs.sb[i] = SATSB(r); > + } > + return vs.d; > +} > + > +uint64_t helper_paddusb(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 8; ++i) { > + int r = vs.ub[i] + vt.ub[i]; > + vs.ub[i] = SATUB(r); > + } > + return vs.d; > +} > + > +uint64_t helper_paddsh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; ++i) { > + int r = vs.sh[i] + vt.sh[i]; > + vs.sh[i] = SATSH(r); > + } > + return vs.d; > +} > + > +uint64_t helper_paddush(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; ++i) { > + int r = vs.uh[i] + vt.uh[i]; > + vs.uh[i] = SATUH(r); > + } > + return vs.d; > +} > + > +uint64_t helper_paddb(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 8; ++i) { > + vs.ub[i] += vt.ub[i]; > + } > + return vs.d; > +} > + > +uint64_t helper_paddh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; ++i) { > + vs.uh[i] += vt.uh[i]; > + } > + return vs.d; > +} > + > +uint64_t helper_paddw(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 2; ++i) { > + vs.uw[i] += vt.uw[i]; > + } > + return vs.d; > +} > + > +uint64_t helper_psubsb(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 8; ++i) { > + int r = vs.sb[i] - vt.sb[i]; > + vs.sb[i] = SATSB(r); > + } > + return vs.d; > +} > + > +uint64_t helper_psubusb(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 8; ++i) { > + int r = vs.ub[i] - vt.ub[i]; > + vs.ub[i] = SATUB(r); > + } > + return vs.d; > +} > + > +uint64_t helper_psubsh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; ++i) { > + int r = vs.sh[i] - vt.sh[i]; > + vs.sh[i] = SATSH(r); > + } > + return vs.d; > +} > + > +uint64_t helper_psubush(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; ++i) { > + int r = vs.uh[i] - vt.uh[i]; > + vs.uh[i] = SATUH(r); > + } > + return vs.d; > +} > + > +uint64_t helper_psubb(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 8; ++i) { > + vs.ub[i] -= vt.ub[i]; > + } > + return vs.d; > +} > + > +uint64_t helper_psubh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; ++i) { > + vs.uh[i] -= vt.uh[i]; > + } > + return vs.d; > +} > + > +uint64_t helper_psubw(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned int i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 2; ++i) { > + vs.uw[i] -= vt.uw[i]; > + } > + return vs.d; > +} > + > +uint64_t helper_pshufh(uint64_t fs, uint64_t ft) > +{ > + unsigned host = BYTE_ORDER_XOR(3); > + LMIValue vd, vs; > + unsigned i; > + > + vs.d = fs; > + vd.d = 0; > + for (i = 0; i < 4; i++, ft >>= 2) { > + vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host]; > + } > + return vd.d; > +} > + > +uint64_t helper_packsswh(uint64_t fs, uint64_t ft) > +{ > + uint64_t fd = 0; > + int64_t tmp; > + > + tmp = (int32_t)(fs >> 0); > + tmp = SATSH(tmp); > + fd |= (tmp & 0xffff) << 0; > + > + tmp = (int32_t)(fs >> 32); > + tmp = SATSH(tmp); > + fd |= (tmp & 0xffff) << 16; > + > + tmp = (int32_t)(ft >> 0); > + tmp = SATSH(tmp); > + fd |= (tmp & 0xffff) << 32; > + > + tmp = (int32_t)(ft >> 32); > + tmp = SATSH(tmp); > + fd |= (tmp & 0xffff) << 48; > + > + return fd; > +} > + > +uint64_t helper_packsshb(uint64_t fs, uint64_t ft) > +{ > + uint64_t fd = 0; > + unsigned int i; > + > + for (i = 0; i < 4; ++i) { > + int16_t tmp = fs >> (i * 16); > + tmp = SATSB(tmp); > + fd |= (uint64_t)(tmp & 0xff) << (i * 8); > + } > + for (i = 0; i < 4; ++i) { > + int16_t tmp = ft >> (i * 16); > + tmp = SATSB(tmp); > + fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); > + } > + > + return fd; > +} > + > +uint64_t helper_packushb(uint64_t fs, uint64_t ft) > +{ > + uint64_t fd = 0; > + unsigned int i; > + > + for (i = 0; i < 4; ++i) { > + int16_t tmp = fs >> (i * 16); > + tmp = SATUB(tmp); > + fd |= (uint64_t)(tmp & 0xff) << (i * 8); > + } > + for (i = 0; i < 4; ++i) { > + int16_t tmp = ft >> (i * 16); > + tmp = SATUB(tmp); > + fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32); > + } > + > + return fd; > +} > + > +uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft) > +{ > + return (fs & 0xffffffff) | (ft << 32); > +} > + > +uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft) > +{ > + return (fs >> 32) | (ft & ~0xffffffffull); > +} > + > +uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft) > +{ > + unsigned host = BYTE_ORDER_XOR(3); > + LMIValue vd, vs, vt; > + > + vs.d = fs; > + vt.d = ft; > + vd.uh[0 ^ host] = vs.uh[0 ^ host]; > + vd.uh[1 ^ host] = vt.uh[0 ^ host]; > + vd.uh[2 ^ host] = vs.uh[1 ^ host]; > + vd.uh[3 ^ host] = vt.uh[1 ^ host]; > + > + return vd.d; > +} > + > +uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft) > +{ > + unsigned host = BYTE_ORDER_XOR(3); > + LMIValue vd, vs, vt; > + > + vs.d = fs; > + vt.d = ft; > + vd.uh[0 ^ host] = vs.uh[2 ^ host]; > + vd.uh[1 ^ host] = vt.uh[2 ^ host]; > + vd.uh[2 ^ host] = vs.uh[3 ^ host]; > + vd.uh[3 ^ host] = vt.uh[3 ^ host]; > + > + return vd.d; > +} > + > +uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft) > +{ > + unsigned host = BYTE_ORDER_XOR(7); > + LMIValue vd, vs, vt; > + > + vs.d = fs; > + vt.d = ft; > + vd.ub[0 ^ host] = vs.ub[0 ^ host]; > + vd.ub[1 ^ host] = vt.ub[0 ^ host]; > + vd.ub[2 ^ host] = vs.ub[1 ^ host]; > + vd.ub[3 ^ host] = vt.ub[1 ^ host]; > + vd.ub[4 ^ host] = vs.ub[2 ^ host]; > + vd.ub[5 ^ host] = vt.ub[2 ^ host]; > + vd.ub[6 ^ host] = vs.ub[3 ^ host]; > + vd.ub[7 ^ host] = vt.ub[3 ^ host]; > + > + return vd.d; > +} > + > +uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft) > +{ > + unsigned host = BYTE_ORDER_XOR(7); > + LMIValue vd, vs, vt; > + > + vs.d = fs; > + vt.d = ft; > + vd.ub[0 ^ host] = vs.ub[4 ^ host]; > + vd.ub[1 ^ host] = vt.ub[4 ^ host]; > + vd.ub[2 ^ host] = vs.ub[5 ^ host]; > + vd.ub[3 ^ host] = vt.ub[5 ^ host]; > + vd.ub[4 ^ host] = vs.ub[6 ^ host]; > + vd.ub[5 ^ host] = vt.ub[6 ^ host]; > + vd.ub[6 ^ host] = vs.ub[7 ^ host]; > + vd.ub[7 ^ host] = vt.ub[7 ^ host]; > + > + return vd.d; > +} > + > +uint64_t helper_pavgh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; i++) { > + vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1; > + } > + return vs.d; > +} > + > +uint64_t helper_pavgb(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 8; i++) { > + vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1; > + } > + return vs.d; > +} > + > +uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; i++) { > + vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]); > + } > + return vs.d; > +} > + > +uint64_t helper_pminsh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; i++) { > + vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]); > + } > + return vs.d; > +} > + > +uint64_t helper_pmaxub(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; i++) { > + vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]); > + } > + return vs.d; > +} > + > +uint64_t helper_pminub(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; i++) { > + vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]); > + } > + return vs.d; > +} > + > +uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 2; i++) { > + vs.uw[i] = -(vs.uw[i] == vt.uw[i]); > + } > + return vs.d; > +} > + > +uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 2; i++) { > + vs.uw[i] = -(vs.uw[i] > vt.uw[i]); > + } > + return vs.d; > +} > + > +uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; i++) { > + vs.uh[i] = -(vs.uh[i] == vt.uh[i]); > + } > + return vs.d; > +} > + > +uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; i++) { > + vs.uh[i] = -(vs.uh[i] > vt.uh[i]); > + } > + return vs.d; > +} > + > +uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 8; i++) { > + vs.ub[i] = -(vs.ub[i] == vt.ub[i]); > + } > + return vs.d; > +} > + > +uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 8; i++) { > + vs.ub[i] = -(vs.ub[i] > vt.ub[i]); > + } > + return vs.d; > +} > + > +uint64_t helper_psllw(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs; > + unsigned i; > + > + ft &= 0x7f; > + if (ft > 31) { > + return 0; > + } > + vs.d = fs; > + for (i = 0; i < 2; ++i) { > + vs.uw[i] <<= ft; > + } > + return vs.d; > +} > + > +uint64_t helper_psrlw(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs; > + unsigned i; > + > + ft &= 0x7f; > + if (ft > 31) { > + return 0; > + } > + vs.d = fs; > + for (i = 0; i < 2; ++i) { > + vs.uw[i] >>= ft; > + } > + return vs.d; > +} > + > +uint64_t helper_psraw(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs; > + unsigned i; > + > + ft &= 0x7f; > + if (ft > 31) { > + ft = 31; > + } > + vs.d = fs; > + for (i = 0; i < 2; ++i) { > + vs.sw[i] >>= ft; > + } > + return vs.d; > +} > + > +uint64_t helper_psllh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs; > + unsigned i; > + > + ft &= 0x7f; > + if (ft > 15) { > + return 0; > + } > + vs.d = fs; > + for (i = 0; i < 4; ++i) { > + vs.uh[i] <<= ft; > + } > + return vs.d; > +} > + > +uint64_t helper_psrlh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs; > + unsigned i; > + > + ft &= 0x7f; > + if (ft > 15) { > + return 0; > + } > + vs.d = fs; > + for (i = 0; i < 4; ++i) { > + vs.uh[i] >>= ft; > + } > + return vs.d; > +} > + > +uint64_t helper_psrah(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs; > + unsigned i; > + > + ft &= 0x7f; > + if (ft > 15) { > + ft = 15; > + } > + vs.d = fs; > + for (i = 0; i < 4; ++i) { > + vs.sh[i] >>= ft; > + } > + return vs.d; > +} > + > +uint64_t helper_pmullh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; ++i) { > + vs.sh[i] *= vt.sh[i]; > + } > + return vs.d; > +} > + > +uint64_t helper_pmulhh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; ++i) { > + int32_t r = vs.sh[i] * vt.sh[i]; > + vs.sh[i] = r >> 16; > + } > + return vs.d; > +} > + > +uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 4; ++i) { > + uint32_t r = vs.uh[i] * vt.uh[i]; > + vs.uh[i] = r >> 16; > + } > + return vs.d; > +} > + > +uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft) > +{ > + unsigned host = BYTE_ORDER_XOR(3); > + LMIValue vs, vt; > + uint32_t p0, p1; > + > + vs.d = fs; > + vt.d = ft; > + p0 = vs.sh[0 ^ host] * vt.sh[0 ^ host]; > + p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host]; > + p1 = vs.sh[2 ^ host] * vt.sh[2 ^ host]; > + p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host]; > + > + return ((uint64_t)p1 << 32) | p0; > +} > + > +uint64_t helper_pasubub(uint64_t fs, uint64_t ft) > +{ > + LMIValue vs, vt; > + unsigned i; > + > + vs.d = fs; > + vt.d = ft; > + for (i = 0; i < 8; ++i) { > + int r = vs.ub[i] - vt.ub[i]; > + vs.ub[i] = (r < 0 ? -r : r); > + } > + return vs.d; > +} > + > +uint64_t helper_biadd(uint64_t fs) > +{ > + unsigned i, fd; > + > + for (i = fd = 0; i < 8; ++i) { > + fd += (fs >> (i * 8)) & 0xff; > + } > + return fd & 0xffff; > +} > + > +uint64_t helper_pmovmskb(uint64_t fs) > +{ > + unsigned fd = 0; > + > + fd |= ((fs >> 7) & 1) << 0; > + fd |= ((fs >> 15) & 1) << 1; > + fd |= ((fs >> 23) & 1) << 2; > + fd |= ((fs >> 31) & 1) << 3; > + fd |= ((fs >> 39) & 1) << 4; > + fd |= ((fs >> 47) & 1) << 5; > + fd |= ((fs >> 55) & 1) << 6; > + fd |= ((fs >> 63) & 1) << 7; > + > + return fd & 0xff; > +} > diff --git a/target-mips/translate.c b/target-mips/translate.c > index 52eeb2b..f61cc6e 100644 > --- a/target-mips/translate.c > +++ b/target-mips/translate.c > @@ -446,6 +446,103 @@ enum { > OPC_BC2 = (0x08 << 21) | OPC_CP2, > }; > > +#define MASK_LMI(op) (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F)) > + > +enum { > + OPC_PADDSH = (24 << 21) | (0x00) | OPC_CP2, > + OPC_PADDUSH = (25 << 21) | (0x00) | OPC_CP2, > + OPC_PADDH = (26 << 21) | (0x00) | OPC_CP2, > + OPC_PADDW = (27 << 21) | (0x00) | OPC_CP2, > + OPC_PADDSB = (28 << 21) | (0x00) | OPC_CP2, > + OPC_PADDUSB = (29 << 21) | (0x00) | OPC_CP2, > + OPC_PADDB = (30 << 21) | (0x00) | OPC_CP2, > + OPC_PADDD = (31 << 21) | (0x00) | OPC_CP2, > + > + OPC_PSUBSH = (24 << 21) | (0x01) | OPC_CP2, > + OPC_PSUBUSH = (25 << 21) | (0x01) | OPC_CP2, > + OPC_PSUBH = (26 << 21) | (0x01) | OPC_CP2, > + OPC_PSUBW = (27 << 21) | (0x01) | OPC_CP2, > + OPC_PSUBSB = (28 << 21) | (0x01) | OPC_CP2, > + OPC_PSUBUSB = (29 << 21) | (0x01) | OPC_CP2, > + OPC_PSUBB = (30 << 21) | (0x01) | OPC_CP2, > + OPC_PSUBD = (31 << 21) | (0x01) | OPC_CP2, > + > + OPC_PSHUFH = (24 << 21) | (0x02) | OPC_CP2, > + OPC_PACKSSWH = (25 << 21) | (0x02) | OPC_CP2, > + OPC_PACKSSHB = (26 << 21) | (0x02) | OPC_CP2, > + OPC_PACKUSHB = (27 << 21) | (0x02) | OPC_CP2, > + OPC_XOR_CP2 = (28 << 21) | (0x02) | OPC_CP2, > + OPC_NOR_CP2 = (29 << 21) | (0x02) | OPC_CP2, > + OPC_AND_CP2 = (30 << 21) | (0x02) | OPC_CP2, > + OPC_PANDN = (31 << 21) | (0x02) | OPC_CP2, > + > + OPC_PUNPCKLHW = (24 << 21) | (0x03) | OPC_CP2, > + OPC_PUNPCKHHW = (25 << 21) | (0x03) | OPC_CP2, > + OPC_PUNPCKLBH = (26 << 21) | (0x03) | OPC_CP2, > + OPC_PUNPCKHBH = (27 << 21) | (0x03) | OPC_CP2, > + OPC_PINSRH_0 = (28 << 21) | (0x03) | OPC_CP2, > + OPC_PINSRH_1 = (29 << 21) | (0x03) | OPC_CP2, > + OPC_PINSRH_2 = (30 << 21) | (0x03) | OPC_CP2, > + OPC_PINSRH_3 = (31 << 21) | (0x03) | OPC_CP2, > + > + OPC_PAVGH = (24 << 21) | (0x08) | OPC_CP2, > + OPC_PAVGB = (25 << 21) | (0x08) | OPC_CP2, > + OPC_PMAXSH = (26 << 21) | (0x08) | OPC_CP2, > + OPC_PMINSH = (27 << 21) | (0x08) | OPC_CP2, > + OPC_PMAXUB = (28 << 21) | (0x08) | OPC_CP2, > + OPC_PMINUB = (29 << 21) | (0x08) | OPC_CP2, > + > + OPC_PCMPEQW = (24 << 21) | (0x09) | OPC_CP2, > + OPC_PCMPGTW = (25 << 21) | (0x09) | OPC_CP2, > + OPC_PCMPEQH = (26 << 21) | (0x09) | OPC_CP2, > + OPC_PCMPGTH = (27 << 21) | (0x09) | OPC_CP2, > + OPC_PCMPEQB = (28 << 21) | (0x09) | OPC_CP2, > + OPC_PCMPGTB = (29 << 21) | (0x09) | OPC_CP2, > + > + OPC_PSLLW = (24 << 21) | (0x0A) | OPC_CP2, > + OPC_PSLLH = (25 << 21) | (0x0A) | OPC_CP2, > + OPC_PMULLH = (26 << 21) | (0x0A) | OPC_CP2, > + OPC_PMULHH = (27 << 21) | (0x0A) | OPC_CP2, > + OPC_PMULUW = (28 << 21) | (0x0A) | OPC_CP2, > + OPC_PMULHUH = (29 << 21) | (0x0A) | OPC_CP2, > + > + OPC_PSRLW = (24 << 21) | (0x0B) | OPC_CP2, > + OPC_PSRLH = (25 << 21) | (0x0B) | OPC_CP2, > + OPC_PSRAW = (26 << 21) | (0x0B) | OPC_CP2, > + OPC_PSRAH = (27 << 21) | (0x0B) | OPC_CP2, > + OPC_PUNPCKLWD = (28 << 21) | (0x0B) | OPC_CP2, > + OPC_PUNPCKHWD = (29 << 21) | (0x0B) | OPC_CP2, > + > + OPC_ADDU_CP2 = (24 << 21) | (0x0C) | OPC_CP2, > + OPC_OR_CP2 = (25 << 21) | (0x0C) | OPC_CP2, > + OPC_ADD_CP2 = (26 << 21) | (0x0C) | OPC_CP2, > + OPC_DADD_CP2 = (27 << 21) | (0x0C) | OPC_CP2, > + OPC_SEQU_CP2 = (28 << 21) | (0x0C) | OPC_CP2, > + OPC_SEQ_CP2 = (29 << 21) | (0x0C) | OPC_CP2, > + > + OPC_SUBU_CP2 = (24 << 21) | (0x0D) | OPC_CP2, > + OPC_PASUBUB = (25 << 21) | (0x0D) | OPC_CP2, > + OPC_SUB_CP2 = (26 << 21) | (0x0D) | OPC_CP2, > + OPC_DSUB_CP2 = (27 << 21) | (0x0D) | OPC_CP2, > + OPC_SLTU_CP2 = (28 << 21) | (0x0D) | OPC_CP2, > + OPC_SLT_CP2 = (29 << 21) | (0x0D) | OPC_CP2, > + > + OPC_SLL_CP2 = (24 << 21) | (0x0E) | OPC_CP2, > + OPC_DSLL_CP2 = (25 << 21) | (0x0E) | OPC_CP2, > + OPC_PEXTRH = (26 << 21) | (0x0E) | OPC_CP2, > + OPC_PMADDHW = (27 << 21) | (0x0E) | OPC_CP2, > + OPC_SLEU_CP2 = (28 << 21) | (0x0E) | OPC_CP2, > + OPC_SLE_CP2 = (29 << 21) | (0x0E) | OPC_CP2, > + > + OPC_SRL_CP2 = (24 << 21) | (0x0F) | OPC_CP2, > + OPC_DSRL_CP2 = (25 << 21) | (0x0F) | OPC_CP2, > + OPC_SRA_CP2 = (26 << 21) | (0x0F) | OPC_CP2, > + OPC_DSRA_CP2 = (27 << 21) | (0x0F) | OPC_CP2, > + OPC_BIADD = (28 << 21) | (0x0F) | OPC_CP2, > + OPC_PMOVMSKB = (29 << 21) | (0x0F) | OPC_CP2, > +}; > + > + > #define MASK_CP3(op) MASK_OP_MAJOR(op) | (op & 0x3F) > > enum { > @@ -2380,8 +2477,8 @@ static void gen_cl (DisasContext *ctx, uint32_t opc, > } > > /* Godson integer instructions */ > -static void gen_loongson_integer (DisasContext *ctx, uint32_t opc, > - int rd, int rs, int rt) > +static void gen_loongson_integer(DisasContext *ctx, uint32_t opc, > + int rd, int rs, int rt) > { > const char *opn = "loongson"; > TCGv t0, t1; > @@ -2594,6 +2691,278 @@ static void gen_loongson_integer (DisasContext *ctx, uint32_t opc, > tcg_temp_free(t1); > } > > +/* Loongson multimedia instructions */ > +static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt) > +{ > + const char *opn = "loongson_cp2"; > + uint32_t opc, shift_max; > + TCGv_i64 t0, t1; > + > + opc = MASK_LMI(ctx->opcode); > + switch (opc) { > + case OPC_ADD_CP2: > + case OPC_SUB_CP2: > + case OPC_DADD_CP2: > + case OPC_DSUB_CP2: > + t0 = tcg_temp_local_new_i64(); > + t1 = tcg_temp_local_new_i64(); > + break; > + default: > + t0 = tcg_temp_new_i64(); > + t1 = tcg_temp_new_i64(); > + break; > + } > + > + gen_load_fpr64(ctx, t0, rs); > + gen_load_fpr64(ctx, t1, rt); > + > +#define LMI_HELPER(UP, LO) \ > + case OPC_##UP: gen_helper_##LO(t0, t0, t1); opn = #LO; break > +#define LMI_HELPER_1(UP, LO) \ > + case OPC_##UP: gen_helper_##LO(t0, t0); opn = #LO; break > +#define LMI_DIRECT(UP, LO, OP) \ > + case OPC_##UP: tcg_gen_##OP##_i64(t0, t0, t1); opn = #LO; break > + > + switch (opc) { > + LMI_HELPER(PADDSH, paddsh); > + LMI_HELPER(PADDUSH, paddush); > + LMI_HELPER(PADDH, paddh); > + LMI_HELPER(PADDW, paddw); > + LMI_HELPER(PADDSB, paddsb); > + LMI_HELPER(PADDUSB, paddusb); > + LMI_HELPER(PADDB, paddb); > + > + LMI_HELPER(PSUBSH, psubsh); > + LMI_HELPER(PSUBUSH, psubush); > + LMI_HELPER(PSUBH, psubh); > + LMI_HELPER(PSUBW, psubw); > + LMI_HELPER(PSUBSB, psubsb); > + LMI_HELPER(PSUBUSB, psubusb); > + LMI_HELPER(PSUBB, psubb); > + > + LMI_HELPER(PSHUFH, pshufh); > + LMI_HELPER(PACKSSWH, packsswh); > + LMI_HELPER(PACKSSHB, packsshb); > + LMI_HELPER(PACKUSHB, packushb); > + > + LMI_HELPER(PUNPCKLHW, punpcklhw); > + LMI_HELPER(PUNPCKHHW, punpckhhw); > + LMI_HELPER(PUNPCKLBH, punpcklbh); > + LMI_HELPER(PUNPCKHBH, punpckhbh); > + LMI_HELPER(PUNPCKLWD, punpcklwd); > + LMI_HELPER(PUNPCKHWD, punpckhwd); > + > + LMI_HELPER(PAVGH, pavgh); > + LMI_HELPER(PAVGB, pavgb); > + LMI_HELPER(PMAXSH, pmaxsh); > + LMI_HELPER(PMINSH, pminsh); > + LMI_HELPER(PMAXUB, pmaxub); > + LMI_HELPER(PMINUB, pminub); > + > + LMI_HELPER(PCMPEQW, pcmpeqw); > + LMI_HELPER(PCMPGTW, pcmpgtw); > + LMI_HELPER(PCMPEQH, pcmpeqh); > + LMI_HELPER(PCMPGTH, pcmpgth); > + LMI_HELPER(PCMPEQB, pcmpeqb); > + LMI_HELPER(PCMPGTB, pcmpgtb); > + > + LMI_HELPER(PSLLW, psllw); > + LMI_HELPER(PSLLH, psllh); > + LMI_HELPER(PSRLW, psrlw); > + LMI_HELPER(PSRLH, psrlh); > + LMI_HELPER(PSRAW, psraw); > + LMI_HELPER(PSRAH, psrah); > + > + LMI_HELPER(PMULLH, pmullh); > + LMI_HELPER(PMULHH, pmulhh); > + LMI_HELPER(PMULHUH, pmulhuh); > + LMI_HELPER(PMADDHW, pmaddhw); > + > + LMI_HELPER(PASUBUB, pasubub); > + LMI_HELPER_1(BIADD, biadd); > + LMI_HELPER_1(PMOVMSKB, pmovmskb); > + > + LMI_DIRECT(PADDD, paddd, add); > + LMI_DIRECT(PSUBD, psubd, sub); > + LMI_DIRECT(XOR_CP2, xor, xor); > + LMI_DIRECT(NOR_CP2, nor, nor); > + LMI_DIRECT(AND_CP2, and, and); > + LMI_DIRECT(PANDN, pandn, andc); > + LMI_DIRECT(OR, or, or); > + > + case OPC_PINSRH_0: > + tcg_gen_deposit_i64(t0, t0, t1, 0, 16); > + opn = "pinsrh_0"; > + break; > + case OPC_PINSRH_1: > + tcg_gen_deposit_i64(t0, t0, t1, 16, 16); > + opn = "pinsrh_1"; > + break; > + case OPC_PINSRH_2: > + tcg_gen_deposit_i64(t0, t0, t1, 32, 16); > + opn = "pinsrh_2"; > + break; > + case OPC_PINSRH_3: > + tcg_gen_deposit_i64(t0, t0, t1, 48, 16); > + opn = "pinsrh_3"; > + break; > + > + case OPC_PEXTRH: > + tcg_gen_andi_i64(t1, t1, 3); > + tcg_gen_shli_i64(t1, t1, 4); > + tcg_gen_shr_i64(t0, t0, t1); > + tcg_gen_ext16u_i64(t0, t0); > + opn = "pextrh"; > + break; > + > + case OPC_ADDU_CP2: > + tcg_gen_add_i64(t0, t0, t1); > + tcg_gen_ext32s_i64(t0, t0); > + opn = "addu"; > + break; > + case OPC_SUBU_CP2: > + tcg_gen_sub_i64(t0, t0, t1); > + tcg_gen_ext32s_i64(t0, t0); > + opn = "addu"; > + break; > + > + case OPC_SLL_CP2: > + opn = "sll"; > + shift_max = 32; > + goto do_shift; > + case OPC_SRL_CP2: > + opn = "srl"; > + shift_max = 32; > + goto do_shift; > + case OPC_SRA_CP2: > + opn = "sra"; > + shift_max = 32; > + goto do_shift; > + case OPC_DSLL_CP2: > + opn = "dsll"; > + shift_max = 64; > + goto do_shift; > + case OPC_DSRL_CP2: > + opn = "dsrl"; > + shift_max = 64; > + goto do_shift; > + case OPC_DSRA_CP2: > + opn = "dsra"; > + shift_max = 64; > + goto do_shift; > + do_shift: > + /* Make sure shift count isn't TCG undefined behaviour. */ > + tcg_gen_andi_i64(t1, t1, shift_max - 1); > + > + switch (opc) { > + case OPC_SLL_CP2: > + case OPC_DSLL_CP2: > + tcg_gen_shl_i64(t0, t0, t1); > + break; > + case OPC_SRA_CP2: > + case OPC_DSRA_CP2: > + /* Since SRA is UndefinedResult without sign-extended inputs, > + we can treat SRA and DSRA the same. */ > + tcg_gen_sar_i64(t0, t0, t1); > + break; > + case OPC_SRL_CP2: > + /* We want to shift in zeros for SRL; zero-extend first. */ > + tcg_gen_ext32u_i64(t0, t0); > + /* FALLTHRU */ > + case OPC_DSRL_CP2: > + tcg_gen_shr_i64(t0, t0, t1); > + break; > + } > + > + if (shift_max == 32) { > + tcg_gen_ext32s_i64(t0, t0); > + } > + > + /* Shifts larger than MAX produce zero. */ > + tcg_gen_setcondi_i64(TCG_COND_LTU, t1, t1, shift_max); > + tcg_gen_neg_i64(t1, t1); > + tcg_gen_and_i64(t0, t0, t1); > + break; > + > + case OPC_ADD_CP2: > + case OPC_DADD_CP2: > + { > + TCGv_i64 t2 = tcg_temp_new_i64(); > + int lab = gen_new_label(); > + > + tcg_gen_mov_i64(t2, t0); > + tcg_gen_add_i64(t0, t1, t2); > + if (opc == OPC_ADD_CP2) { > + tcg_gen_ext32s_i64(t0, t0); > + } > + tcg_gen_xor_i64(t1, t1, t2); > + tcg_gen_xor_i64(t2, t2, t0); > + tcg_gen_andc_i64(t1, t2, t1); > + tcg_temp_free_i64(t2); > + tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab); > + generate_exception(ctx, EXCP_OVERFLOW); > + gen_set_label(lab); > + > + opn = (opc == OPC_ADD_CP2 ? "add" : "dadd"); > + break; > + } > + > + case OPC_SUB_CP2: > + case OPC_DSUB_CP2: > + { > + TCGv_i64 t2 = tcg_temp_new_i64(); > + int lab = gen_new_label(); > + > + tcg_gen_mov_i64(t2, t0); > + tcg_gen_sub_i64(t0, t1, t2); > + if (opc == OPC_SUB_CP2) { > + tcg_gen_ext32s_i64(t0, t0); > + } > + tcg_gen_xor_i64(t1, t1, t2); > + tcg_gen_xor_i64(t2, t2, t0); > + tcg_gen_and_i64(t1, t1, t2); > + tcg_temp_free_i64(t2); > + tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab); > + generate_exception(ctx, EXCP_OVERFLOW); > + gen_set_label(lab); > + > + opn = (opc == OPC_SUB_CP2 ? "sub" : "dsub"); > + break; > + } > + > + case OPC_PMULUW: > + tcg_gen_ext32u_i64(t0, t0); > + tcg_gen_ext32u_i64(t1, t1); > + tcg_gen_mul_i64(t0, t0, t1); > + opn = "pmuluw"; > + break; > + > + case OPC_SEQU_CP2: > + case OPC_SEQ_CP2: > + case OPC_SLTU_CP2: > + case OPC_SLT_CP2: > + case OPC_SLEU_CP2: > + case OPC_SLE_CP2: > + /* ??? Document is unclear: Set FCC[CC]. Does that mean the > + FD field is the CC field? */ > + default: > + MIPS_INVAL(opn); > + generate_exception(ctx, EXCP_RI); > + return; > + } > + > +#undef LMI_HELPER > +#undef LMI_DIRECT > + > + gen_store_fpr64(ctx, t0, rd); > + > + (void)opn; /* avoid a compiler warning */ > + MIPS_DEBUG("%s %s, %s, %s", opn, > + fregnames[rd], fregnames[rs], fregnames[rt]); > + tcg_temp_free_i64(t0); > + tcg_temp_free_i64(t1); > +} > + > /* Traps */ > static void gen_trap (DisasContext *ctx, uint32_t opc, > int rs, int rt, int16_t imm) > @@ -12316,10 +12685,14 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx, int *is_branch) > case OPC_LDC2: > case OPC_SWC2: > case OPC_SDC2: > - case OPC_CP2: > /* COP2: Not implemented. */ > generate_exception_err(ctx, EXCP_CpU, 2); > break; > + case OPC_CP2: > + check_insn(env, ctx, INSN_LOONGSON2F); > + /* Note that these instructions use different fields. */ > + gen_loongson_multimedia(ctx, sa, rd, rt); > + break; > > case OPC_CP3: > if (env->CP0_Config1 & (1 << CP0C1_FP)) { Thanks, applied. -- Aurelien Jarno GPG: 1024D/F1BCDB73 aurelien@aurel32.net http://www.aurel32.net