[Qemu-devel] [PATCH 00/20] target-mips: add MSA module

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH 00/20] target-mips: add MSA module
@ 2014-07-14  9:55 Yongbok Kim
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 01/20] target-mips: add MSA defines and data structure Yongbok Kim
                   ` (19 more replies)
  0 siblings, 20 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

The following patchset implements MIPS SIMD Architecture module.
MSA adds new instructions to MIPS Architecture that allow efficient
parallel processing of vector operations.

For more information refer to:
MIPS Architecture Reference Manual
Volume IV-j: The MIPS32 SIMD Architecture Module
The document (MD00867) is available at:
http://www.imgtec.com/mips/architectures/simd.asp

Note that LSA instruction is not included in this patchset as
Release 6 implements it.

This patchset is on top of the patch:
[PATCH v2] target-mips: fix broken MIPS16 and microMIPS
http://patchwork.ozlabs.org/patch/366146/

The MSA floating-point is compliant with the IEEE Standard for Floating-Point
Arithmetic 754TM-2008. However this patchset is not set up with the IEEE-2008
option as QEMU softfloat for MIPS has not been updated yet.

Yongbok Kim (20):
  target-mips: add MSA defines and data structure
  target-mips: add MSA exceptions
  target-mips: move common funcs to cpu.h
  target-mips: add 8, 16, 32, 64 bits load and store
  target-mips: stop translation after ctc1
  target-mips: add MSA opcode enum
  target-mips: add msa_reset(), global msa register
  target-mips: add msa_helper.c
  target-mips: add MSA branch instructions
  target-mips: add MSA I8 format instructions
  target-mips: add MSA I5 format instructions
  target-mips: add MSA BIT format instructions
  target-mips: add MSA 3R format instructions
  target-mips: add MSA ELM format instructions
  target-mips: add MSA 3RF format instructions
  target-mips: add MSA VEC/2R format instructions
  target-mips: add MSA 2RF format instructions
  target-mips: add MSA MI10 format instructions
  disas/mips.c: disassemble MSA instructions
  target-mips: add MSA support to mips32r5-generic

 disas/mips.c                 |  721 ++++++-
 target-mips/Makefile.objs    |    2 +-
 target-mips/cpu.h            |  159 ++-
 target-mips/gdbstub.c        |    7 -
 target-mips/helper.c         |    8 +
 target-mips/helper.h         |  177 ++
 target-mips/mips-defs.h      |    1 +
 target-mips/msa_helper.c     | 5423 ++++++++++++++++++++++++++++++++++++++++++
 target-mips/op_helper.c      |   61 +-
 target-mips/translate.c      | 1576 ++++++++++++-
 target-mips/translate_init.c |   49 +-
 11 files changed, 8107 insertions(+), 77 deletions(-)
 create mode 100644 target-mips/msa_helper.c

-- 
1.7.4

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 01/20] target-mips: add MSA defines and data structure
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-10-22 11:35   ` James Hogan
  2014-10-22 13:15   ` James Hogan
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 02/20] target-mips: add MSA exceptions Yongbok Kim
                   ` (18 subsequent siblings)
  19 siblings, 2 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add defines and data structure for MIPS SIMD Architecture

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/cpu.h       |   79 +++++++++++++++++++++++++++++++++++++++++++++--
 target-mips/mips-defs.h |    1 +
 target-mips/op_helper.c |    1 +
 3 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index c81dfac..9a6b77c 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -51,12 +51,74 @@ struct CPUMIPSTLBContext {
 };
 #endif
 
+/* MSA Context */
+
+#define MSA_WRLEN (128)
+
+typedef union wr_t wr_t;
+union wr_t {
+    int8_t  b[MSA_WRLEN/8];
+    int16_t h[MSA_WRLEN/16];
+    int32_t w[MSA_WRLEN/32];
+    int64_t d[MSA_WRLEN/64];
+};
+
+typedef struct CPUMIPSMSAContext CPUMIPSMSAContext;
+struct CPUMIPSMSAContext {
+
+#define MSAIR_REGISTER      0
+#define MSACSR_REGISTER     1
+#define MSAACCESS_REGISTER  2
+#define MSASAVE_REGISTER    3
+#define MSAMODIFY_REGISTER  4
+#define MSAREQUEST_REGISTER 5
+#define MSAMAP_REGISTER     6
+#define MSAUNMAP_REGISTER   7
+
+    int32_t msair;
+
+#define MSAIR_WRP_POS 16
+#define MSAIR_WRP_BIT (1 << MSAIR_WRP_POS)
+
+    int32_t msacsr;
+
+#define MSACSR_RM_POS   0
+#define MSACSR_RM_MASK  (0x3 << MSACSR_RM_POS)
+
+#define MSACSR_CAUSE_ENABLE_FLAGS_POS 2
+#define MSACSR_CAUSE_ENABLE_FLAGS_MASK \
+    (0xffff << MSACSR_CAUSE_ENABLE_FLAGS_POS)
+
+#define MSACSR_NX_POS 18
+#define MSACSR_NX_BIT (1 << MSACSR_NX_POS)
+
+#define MSACSR_FS_POS 24
+#define MSACSR_FS_BIT (1 << MSACSR_FS_POS)
+
+#define MSACSR_BITS                             \
+    (MSACSR_RM_MASK |                           \
+     MSACSR_CAUSE_ENABLE_FLAGS_MASK |           \
+     MSACSR_FS_BIT |                            \
+     MSACSR_NX_BIT)
+
+    int32_t msaaccess;
+    int32_t msasave;
+    int32_t msamodify;
+    int32_t msarequest;
+    int32_t msamap;
+    int32_t msaunmap;
+
+    float_status fp_status;
+};
+
 typedef union fpr_t fpr_t;
 union fpr_t {
     float64  fd;   /* ieee double precision */
     float32  fs[2];/* ieee single precision */
     uint64_t d;    /* binary double fixed-point */
     uint32_t w[2]; /* binary single fixed-point */
+/* FPU/MSA register mapping is not tested on big-endian hosts. */
+    wr_t     wr;   /* vector data */
 };
 /* define FP_ENDIAN_IDX to access the same location
  * in the fpr_t union regardless of the host endianness
@@ -175,6 +237,7 @@ typedef struct CPUMIPSState CPUMIPSState;
 struct CPUMIPSState {
     TCState active_tc;
     CPUMIPSFPUContext active_fpu;
+    CPUMIPSMSAContext active_msa;
 
     uint32_t current_tc;
     uint32_t current_fpu;
@@ -362,6 +425,7 @@ struct CPUMIPSState {
 #define CP0C2_SA   0
     int32_t CP0_Config3;
 #define CP0C3_M    31
+#define CP0C3_MSAP  28
 #define CP0C3_ISA_ON_EXC 16
 #define CP0C3_ULRI 13
 #define CP0C3_DSPP 10
@@ -431,7 +495,7 @@ struct CPUMIPSState {
     int error_code;
     uint32_t hflags;    /* CPU State */
     /* TMASK defines different execution modes */
-#define MIPS_HFLAG_TMASK  0x1807FF
+#define MIPS_HFLAG_TMASK  0x5807FF
 #define MIPS_HFLAG_MODE   0x00007 /* execution modes                    */
     /* The KSU flags must be the lowest bits in hflags. The flag order
        must be the same as defined for CP0 Status. This allows to use
@@ -475,6 +539,7 @@ struct CPUMIPSState {
 #define MIPS_HFLAG_DSPR2 0x100000  /* Enable access to MIPS DSPR2 resources. */
     /* Extra flag about HWREna register. */
 #define MIPS_HFLAG_HWRENA_ULR 0x200000 /* ULR bit from HWREna is set. */
+#define MIPS_HFLAG_MSA   0x400000
     target_ulong btarget;        /* Jump / branch target               */
     target_ulong bcond;          /* Branch condition (if needed)       */
 
@@ -628,8 +693,10 @@ enum {
     EXCP_C2E,
     EXCP_CACHE, /* 32 */
     EXCP_DSPDIS,
+    EXCP_MSADIS,
+    EXCP_MSAFPE,
 
-    EXCP_LAST = EXCP_DSPDIS,
+    EXCP_LAST = EXCP_MSAFPE,
 };
 /* Dummy exception for conditional stores.  */
 #define EXCP_SC 0x100
@@ -726,7 +793,8 @@ static inline void compute_hflags(CPUMIPSState *env)
 {
     env->hflags &= ~(MIPS_HFLAG_COP1X | MIPS_HFLAG_64 | MIPS_HFLAG_CP0 |
                      MIPS_HFLAG_F64 | MIPS_HFLAG_FPU | MIPS_HFLAG_KSU |
-                     MIPS_HFLAG_UX | MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2);
+                     MIPS_HFLAG_UX | MIPS_HFLAG_DSP | MIPS_HFLAG_DSPR2 |
+                     MIPS_HFLAG_MSA);
     if (!(env->CP0_Status & (1 << CP0St_EXL)) &&
         !(env->CP0_Status & (1 << CP0St_ERL)) &&
         !(env->hflags & MIPS_HFLAG_DM)) {
@@ -784,6 +852,11 @@ static inline void compute_hflags(CPUMIPSState *env)
             env->hflags |= MIPS_HFLAG_COP1X;
         }
     }
+    if (env->insn_flags & ASE_MSA) {
+        if (env->CP0_Config5 & (1 << CP0C5_MSAEn)) {
+            env->hflags |= MIPS_HFLAG_MSA;
+        }
+    }
 }
 
 #endif /* !defined (__MIPS_CPU_H__) */
diff --git a/target-mips/mips-defs.h b/target-mips/mips-defs.h
index 9dfa516..11722bb 100644
--- a/target-mips/mips-defs.h
+++ b/target-mips/mips-defs.h
@@ -41,6 +41,7 @@
 #define		ASE_MT		0x00020000
 #define		ASE_SMARTMIPS	0x00040000
 #define 	ASE_MICROMIPS	0x00080000
+#define     ASE_MSA     0x00100000
 
 /* Chip specific instructions. */
 #define		INSN_LOONGSON2E  0x20000000
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 27651a4..75f8af8 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -1512,6 +1512,7 @@ void helper_mtc0_config5(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Config5 = (env->CP0_Config5 & (~env->CP0_Config5_rw_bitmask)) |
                        (arg1 & env->CP0_Config5_rw_bitmask);
+    compute_hflags(env);
 }
 
 void helper_mtc0_lladdr(CPUMIPSState *env, target_ulong arg1)
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 01/20] target-mips: add MSA defines and data structure
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 01/20] target-mips: add MSA defines and data structure Yongbok Kim
@ 2014-10-22 11:35   ` James Hogan
  2014-10-24  9:35     ` Yongbok Kim
  2014-10-22 13:15   ` James Hogan
  1 sibling, 1 reply; 35+ messages in thread
From: James Hogan @ 2014-10-22 11:35 UTC (permalink / raw)
  To: Yongbok Kim, qemu-devel; +Cc: cristian.cuna, leon.alrae, aurelien

Hi,

On 14/07/14 10:55, Yongbok Kim wrote:
> +union wr_t {
> +    int8_t  b[MSA_WRLEN/8];
> +    int16_t h[MSA_WRLEN/16];
> +    int32_t w[MSA_WRLEN/32];
> +    int64_t d[MSA_WRLEN/64];

This is incorrect on a big endian host. The least significant bits of
the lowest indexed element should always alias.

With a compiler for little endian this will work fine since b[0] will
alias the least significant bits of h[0], w[0], and d[0], whereas with a
compiler for big endian, b[0] will alias the upper byte of h[0], w[0],
and d[0].

> diff --git a/target-mips/mips-defs.h b/target-mips/mips-defs.h
> index 9dfa516..11722bb 100644
> --- a/target-mips/mips-defs.h
> +++ b/target-mips/mips-defs.h
> @@ -41,6 +41,7 @@
>  #define		ASE_MT		0x00020000
>  #define		ASE_SMARTMIPS	0x00040000
>  #define 	ASE_MICROMIPS	0x00080000
> +#define     ASE_MSA     0x00100000

inconsistent whitespace... though maybe it was already incorrect.

Cheers
James

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 01/20] target-mips: add MSA defines and data structure
  2014-10-22 11:35   ` James Hogan
@ 2014-10-24  9:35     ` Yongbok Kim
  2014-10-24 12:57       ` Leon Alrae
  0 siblings, 1 reply; 35+ messages in thread
From: Yongbok Kim @ 2014-10-24  9:35 UTC (permalink / raw)
  To: James Hogan, qemu-devel; +Cc: cristian.cuna, leon.alrae, aurelien

Hi,

On 22/10/2014 12:35, James Hogan wrote:
> +union wr_t {
> +    int8_t  b[MSA_WRLEN/8];
> +    int16_t h[MSA_WRLEN/16];
> +    int32_t w[MSA_WRLEN/32];
> +    int64_t d[MSA_WRLEN/64];
> This is incorrect on a big endian host. The least significant bits of
> the lowest indexed element should always alias.
>
> With a compiler for little endian this will work fine since b[0] will
> alias the least significant bits of h[0], w[0], and d[0], whereas with a
> compiler for big endian, b[0] will alias the upper byte of h[0], w[0],
> and d[0].

Yes it wouldn't work for a big endian host.
However this MSA feature has been fully verified for big and little 
endian targets on a little endian host.
I can see that the dsp_helper.c has similar problem as well.
MSA could be forcibly turned off in a big endian host or might be leaved 
as it is just like DSP.
If we need to implement for big endian host it would take longer time...

What do you guys think about that?

Regards,
Yongbok

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 01/20] target-mips: add MSA defines and data structure
  2014-10-24  9:35     ` Yongbok Kim
@ 2014-10-24 12:57       ` Leon Alrae
  0 siblings, 0 replies; 35+ messages in thread
From: Leon Alrae @ 2014-10-24 12:57 UTC (permalink / raw)
  To: Yongbok Kim, James Hogan, qemu-devel; +Cc: cristian.cuna, aurelien

On 24/10/2014 10:35, Yongbok Kim wrote:
> Hi,
> 
> On 22/10/2014 12:35, James Hogan wrote:
>> +union wr_t {
>> +    int8_t  b[MSA_WRLEN/8];
>> +    int16_t h[MSA_WRLEN/16];
>> +    int32_t w[MSA_WRLEN/32];
>> +    int64_t d[MSA_WRLEN/64];
>> This is incorrect on a big endian host. The least significant bits of
>> the lowest indexed element should always alias.
>>
>> With a compiler for little endian this will work fine since b[0] will
>> alias the least significant bits of h[0], w[0], and d[0], whereas with a
>> compiler for big endian, b[0] will alias the upper byte of h[0], w[0],
>> and d[0].
> 
> Yes it wouldn't work for a big endian host.
> However this MSA feature has been fully verified for big and little
> endian targets on a little endian host.
> I can see that the dsp_helper.c has similar problem as well.
> MSA could be forcibly turned off in a big endian host or might be leaved
> as it is just like DSP.
> If we need to implement for big endian host it would take longer time...
> 
> What do you guys think about that?

I don't think it would be reasonable to reject these patches only
because of big-endian host limitation as these patches contain huge
amount of work already (moreover, MSA is quite isolated). However, this
version of patchset has other issues pointed out by me and James in
previous emails - they need to be fixed / cleaned up.

Thanks,
Leon

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 01/20] target-mips: add MSA defines and data structure
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 01/20] target-mips: add MSA defines and data structure Yongbok Kim
  2014-10-22 11:35   ` James Hogan
@ 2014-10-22 13:15   ` James Hogan
  1 sibling, 0 replies; 35+ messages in thread
From: James Hogan @ 2014-10-22 13:15 UTC (permalink / raw)
  To: Yongbok Kim, qemu-devel; +Cc: cristian.cuna, leon.alrae, aurelien

Hi,

On 14/07/14 10:55, Yongbok Kim wrote:
> +typedef struct CPUMIPSMSAContext CPUMIPSMSAContext;
> +struct CPUMIPSMSAContext {

> +    int32_t msair;

> +    int32_t msacsr;

> +    int32_t msaaccess;
> +    int32_t msasave;
> +    int32_t msamodify;
> +    int32_t msarequest;
> +    int32_t msamap;
> +    int32_t msaunmap;
> +
> +    float_status fp_status;
> +};
> +
>  typedef union fpr_t fpr_t;
>  union fpr_t {
>      float64  fd;   /* ieee double precision */
>      float32  fs[2];/* ieee single precision */
>      uint64_t d;    /* binary double fixed-point */
>      uint32_t w[2]; /* binary single fixed-point */
> +/* FPU/MSA register mapping is not tested on big-endian hosts. */
> +    wr_t     wr;   /* vector data */
>  };
>  /* define FP_ENDIAN_IDX to access the same location
>   * in the fpr_t union regardless of the host endianness
> @@ -175,6 +237,7 @@ typedef struct CPUMIPSState CPUMIPSState;
>  struct CPUMIPSState {
>      TCState active_tc;
>      CPUMIPSFPUContext active_fpu;
> +    CPUMIPSMSAContext active_msa;

According to the manual, only the msair register is shared between
thread contexts, each thread context has its own version of the rest of
the msa registers, so most of this should be TCState I think.

Cheers
James

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 02/20] target-mips: add MSA exceptions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 01/20] target-mips: add MSA defines and data structure Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 03/20] target-mips: move common funcs to cpu.h Yongbok Kim
                   ` (17 subsequent siblings)
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA exceptions

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/target-mips/helper.c b/target-mips/helper.c
index 8a997e4..ed796ff 100644
--- a/target-mips/helper.c
+++ b/target-mips/helper.c
@@ -396,6 +396,8 @@ static const char * const excp_names[EXCP_LAST + 1] = {
     [EXCP_MDMX] = "MDMX",
     [EXCP_C2E] = "precise coprocessor 2",
     [EXCP_CACHE] = "cache error",
+    [EXCP_MSADIS] = "MSA disabled",
+    [EXCP_MSAFPE] = "MSA floating point",
 };
 
 target_ulong exception_resume_pc (CPUMIPSState *env)
@@ -608,12 +610,18 @@ void mips_cpu_do_interrupt(CPUState *cs)
     case EXCP_TRAP:
         cause = 13;
         goto set_EPC;
+    case EXCP_MSAFPE:
+        cause = 14;
+        goto set_EPC;
     case EXCP_FPE:
         cause = 15;
         goto set_EPC;
     case EXCP_C2E:
         cause = 18;
         goto set_EPC;
+    case EXCP_MSADIS:
+        cause = 21;
+        goto set_EPC;
     case EXCP_MDMX:
         cause = 22;
         goto set_EPC;
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 03/20] target-mips: move common funcs to cpu.h
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 01/20] target-mips: add MSA defines and data structure Yongbok Kim
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 02/20] target-mips: add MSA exceptions Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-10-10  9:22   ` Leon Alrae
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 04/20] target-mips: add 8, 16, 32, 64 bits load and store Yongbok Kim
                   ` (16 subsequent siblings)
  19 siblings, 1 reply; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

move commonly used functions to cpu.h

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/cpu.h       |   72 +++++++++++++++++++++++++++++++++++++++++++++++
 target-mips/gdbstub.c   |    7 ----
 target-mips/op_helper.c |   60 +-------------------------------------
 3 files changed, 74 insertions(+), 65 deletions(-)

diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 9a6b77c..68ce383 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -747,6 +747,10 @@ hwaddr cpu_mips_translate_address (CPUMIPSState *env, target_ulong address,
 #endif
 target_ulong exception_resume_pc (CPUMIPSState *env);
 
+/* op_helper.c */
+extern unsigned int ieee_rm[];
+int ieee_ex_to_mips(int xcpt);
+
 static inline void cpu_get_tb_cpu_state(CPUMIPSState *env, target_ulong *pc,
                                         target_ulong *cs_base, int *flags)
 {
@@ -859,4 +863,72 @@ static inline void compute_hflags(CPUMIPSState *env)
     }
 }
 
+#include "exec/cpu_ldst.h"
+
+#if defined(CONFIG_USER_ONLY)
+#define HELPER_LD(name, insn, type)                                     \
+static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             int mem_idx)                               \
+{                                                                       \
+    return (type) insn##_raw(addr);                                     \
+}
+#else
+#define HELPER_LD(name, insn, type)                                     \
+static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             int mem_idx)                               \
+{                                                                       \
+    switch (mem_idx) {                                                  \
+    case 0:                                                             \
+        return (type) cpu_##insn##_kernel(env, addr);                   \
+        break;                                                          \
+    case 1:                                                             \
+        return (type) cpu_##insn##_super(env, addr);                    \
+        break;                                                          \
+    default:                                                            \
+    case 2:                                                             \
+        return (type) cpu_##insn##_user(env, addr);                     \
+        break;                                                          \
+    }                                                                   \
+}
+#endif
+HELPER_LD(lbu, ldub, uint8_t)
+HELPER_LD(lw, ldl, int32_t)
+#ifdef TARGET_MIPS64
+HELPER_LD(ld, ldq, int64_t)
+#endif
+#undef HELPER_LD
+
+#if defined(CONFIG_USER_ONLY)
+#define HELPER_ST(name, insn, type)                                     \
+static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             type val, int mem_idx)                     \
+{                                                                       \
+    insn##_raw(addr, val);                                              \
+}
+#else
+#define HELPER_ST(name, insn, type)                                     \
+static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             type val, int mem_idx)                     \
+{                                                                       \
+    switch (mem_idx) {                                                  \
+    case 0:                                                             \
+        cpu_##insn##_kernel(env, addr, val);                            \
+        break;                                                          \
+    case 1:                                                             \
+        cpu_##insn##_super(env, addr, val);                             \
+        break;                                                          \
+    default:                                                            \
+    case 2:                                                             \
+        cpu_##insn##_user(env, addr, val);                              \
+        break;                                                          \
+    }                                                                   \
+}
+#endif
+HELPER_ST(sb, stb, uint8_t)
+HELPER_ST(sw, stl, uint32_t)
+#ifdef TARGET_MIPS64
+HELPER_ST(sd, stq, uint64_t)
+#endif
+#undef HELPER_ST
+
 #endif /* !defined (__MIPS_CPU_H__) */
diff --git a/target-mips/gdbstub.c b/target-mips/gdbstub.c
index 5b72d58..f65fec2 100644
--- a/target-mips/gdbstub.c
+++ b/target-mips/gdbstub.c
@@ -73,13 +73,6 @@ int mips_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n)
     return 0;
 }
 
-/* convert MIPS rounding mode in FCR31 to IEEE library */
-static unsigned int ieee_rm[] = {
-    float_round_nearest_even,
-    float_round_to_zero,
-    float_round_up,
-    float_round_down
-};
 #define RESTORE_ROUNDING_MODE \
     set_float_rounding_mode(ieee_rm[env->active_fpu.fcr31 & 3], \
                             &env->active_fpu.fp_status)
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 75f8af8..b47e2c8 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -68,62 +68,6 @@ void helper_raise_exception(CPUMIPSState *env, uint32_t exception)
     do_raise_exception(env, exception, 0);
 }
 
-#if defined(CONFIG_USER_ONLY)
-#define HELPER_LD(name, insn, type)                                     \
-static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
-                             int mem_idx)                               \
-{                                                                       \
-    return (type) insn##_raw(addr);                                     \
-}
-#else
-#define HELPER_LD(name, insn, type)                                     \
-static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
-                             int mem_idx)                               \
-{                                                                       \
-    switch (mem_idx)                                                    \
-    {                                                                   \
-    case 0: return (type) cpu_##insn##_kernel(env, addr); break;        \
-    case 1: return (type) cpu_##insn##_super(env, addr); break;         \
-    default:                                                            \
-    case 2: return (type) cpu_##insn##_user(env, addr); break;          \
-    }                                                                   \
-}
-#endif
-HELPER_LD(lbu, ldub, uint8_t)
-HELPER_LD(lw, ldl, int32_t)
-#ifdef TARGET_MIPS64
-HELPER_LD(ld, ldq, int64_t)
-#endif
-#undef HELPER_LD
-
-#if defined(CONFIG_USER_ONLY)
-#define HELPER_ST(name, insn, type)                                     \
-static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
-                             type val, int mem_idx)                     \
-{                                                                       \
-    insn##_raw(addr, val);                                              \
-}
-#else
-#define HELPER_ST(name, insn, type)                                     \
-static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
-                             type val, int mem_idx)                     \
-{                                                                       \
-    switch (mem_idx)                                                    \
-    {                                                                   \
-    case 0: cpu_##insn##_kernel(env, addr, val); break;                 \
-    case 1: cpu_##insn##_super(env, addr, val); break;                  \
-    default:                                                            \
-    case 2: cpu_##insn##_user(env, addr, val); break;                   \
-    }                                                                   \
-}
-#endif
-HELPER_ST(sb, stb, uint8_t)
-HELPER_ST(sw, stl, uint32_t)
-#ifdef TARGET_MIPS64
-HELPER_ST(sd, stq, uint64_t)
-#endif
-#undef HELPER_ST
-
 target_ulong helper_clo (target_ulong arg1)
 {
     return clo32(arg1);
@@ -2185,7 +2129,7 @@ void mips_cpu_unassigned_access(CPUState *cs, hwaddr addr,
 #define FP_TO_INT64_OVERFLOW 0x7fffffffffffffffULL
 
 /* convert MIPS rounding mode in FCR31 to IEEE library */
-static unsigned int ieee_rm[] = {
+unsigned int ieee_rm[] = {
     float_round_nearest_even,
     float_round_to_zero,
     float_round_up,
@@ -2301,7 +2245,7 @@ void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t fs, uint32_t rt)
         do_raise_exception(env, EXCP_FPE, GETPC());
 }
 
-static inline int ieee_ex_to_mips(int xcpt)
+int ieee_ex_to_mips(int xcpt)
 {
     int ret = 0;
     if (xcpt) {
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 03/20] target-mips: move common funcs to cpu.h
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 03/20] target-mips: move common funcs to cpu.h Yongbok Kim
@ 2014-10-10  9:22   ` Leon Alrae
  0 siblings, 0 replies; 35+ messages in thread
From: Leon Alrae @ 2014-10-10  9:22 UTC (permalink / raw)
  To: Yongbok Kim, qemu-devel; +Cc: cristian.cuna, aurelien

Hi Yongbok,

On 14/07/2014 10:55, Yongbok Kim wrote:
> +#include "exec/cpu_ldst.h"
> +
> +#if defined(CONFIG_USER_ONLY)
> +#define HELPER_LD(name, insn, type)                                     \
> +static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
> +                             int mem_idx)                               \
> +{                                                                       \
> +    return (type) insn##_raw(addr);                                     \
> +}
> +#else
> +#define HELPER_LD(name, insn, type)                                     \
> +static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
> +                             int mem_idx)                               \
> +{                                                                       \
> +    switch (mem_idx) {                                                  \
> +    case 0:                                                             \
> +        return (type) cpu_##insn##_kernel(env, addr);                   \
> +        break;                                                          \
> +    case 1:                                                             \
> +        return (type) cpu_##insn##_super(env, addr);                    \
> +        break;                                                          \
> +    default:                                                            \
> +    case 2:                                                             \
> +        return (type) cpu_##insn##_user(env, addr);                     \
> +        break;                                                          \
> +    }                                                                   \
> +}
> +#endif
> +HELPER_LD(lbu, ldub, uint8_t)
> +HELPER_LD(lw, ldl, int32_t)
> +#ifdef TARGET_MIPS64
> +HELPER_LD(ld, ldq, int64_t)
> +#endif
> +#undef HELPER_LD
> +
> +#if defined(CONFIG_USER_ONLY)
> +#define HELPER_ST(name, insn, type)                                     \
> +static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
> +                             type val, int mem_idx)                     \
> +{                                                                       \
> +    insn##_raw(addr, val);                                              \
> +}
> +#else
> +#define HELPER_ST(name, insn, type)                                     \
> +static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
> +                             type val, int mem_idx)                     \
> +{                                                                       \
> +    switch (mem_idx) {                                                  \
> +    case 0:                                                             \
> +        cpu_##insn##_kernel(env, addr, val);                            \
> +        break;                                                          \
> +    case 1:                                                             \
> +        cpu_##insn##_super(env, addr, val);                             \
> +        break;                                                          \
> +    default:                                                            \
> +    case 2:                                                             \
> +        cpu_##insn##_user(env, addr, val);                              \
> +        break;                                                          \
> +    }                                                                   \
> +}
> +#endif
> +HELPER_ST(sb, stb, uint8_t)
> +HELPER_ST(sw, stl, uint32_t)
> +#ifdef TARGET_MIPS64
> +HELPER_ST(sd, stq, uint64_t)
> +#endif
> +#undef HELPER_ST
> +

I'm not sure if moving this to cpu.h is a good idea - it won't be used
anywhere else than in op_helper.c and msa_helper.c (and probably these
static inlines will generate warnings in clang). Only msa_ld_df and
msa_st_df in msa_helper.c need them, thus in my opinion it will be
better just to move these 2 functions from msa_helper.c to op_helper.c.

Regards,
Leon

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 04/20] target-mips: add 8, 16, 32, 64 bits load and store
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (2 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 03/20] target-mips: move common funcs to cpu.h Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-10-10  9:26   ` Leon Alrae
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 05/20] target-mips: stop translation after ctc1 Yongbok Kim
                   ` (15 subsequent siblings)
  19 siblings, 1 reply; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add 8, 16, 32, 64 bits load and store

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/cpu.h |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 68ce383..51d1c88 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -896,6 +896,10 @@ HELPER_LD(lw, ldl, int32_t)
 #ifdef TARGET_MIPS64
 HELPER_LD(ld, ldq, int64_t)
 #endif
+HELPER_LD(ld8, ldub, uint8_t)
+HELPER_LD(ld16, lduw, uint16_t)
+HELPER_LD(ld32, ldl, int32_t)
+HELPER_LD(ld64, ldq, int64_t)
 #undef HELPER_LD
 
 #if defined(CONFIG_USER_ONLY)
@@ -929,6 +933,10 @@ HELPER_ST(sw, stl, uint32_t)
 #ifdef TARGET_MIPS64
 HELPER_ST(sd, stq, uint64_t)
 #endif
+HELPER_ST(st8, stb, uint8_t)
+HELPER_ST(st16, stw, uint16_t)
+HELPER_ST(st32, stl, int32_t)
+HELPER_ST(st64, stq, int64_t)
 #undef HELPER_ST
 
 #endif /* !defined (__MIPS_CPU_H__) */
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 04/20] target-mips: add 8, 16, 32, 64 bits load and store
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 04/20] target-mips: add 8, 16, 32, 64 bits load and store Yongbok Kim
@ 2014-10-10  9:26   ` Leon Alrae
  0 siblings, 0 replies; 35+ messages in thread
From: Leon Alrae @ 2014-10-10  9:26 UTC (permalink / raw)
  To: Yongbok Kim, qemu-devel; +Cc: cristian.cuna, aurelien

On 14/07/2014 10:55, Yongbok Kim wrote:
> add 8, 16, 32, 64 bits load and store
> 
> Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
> ---
>  target-mips/cpu.h |    8 ++++++++
>  1 files changed, 8 insertions(+), 0 deletions(-)
> 
> diff --git a/target-mips/cpu.h b/target-mips/cpu.h
> index 68ce383..51d1c88 100644
> --- a/target-mips/cpu.h
> +++ b/target-mips/cpu.h
> @@ -896,6 +896,10 @@ HELPER_LD(lw, ldl, int32_t)
>  #ifdef TARGET_MIPS64
>  HELPER_LD(ld, ldq, int64_t)
>  #endif
> +HELPER_LD(ld8, ldub, uint8_t)
> +HELPER_LD(ld16, lduw, uint16_t)
> +HELPER_LD(ld32, ldl, int32_t)
> +HELPER_LD(ld64, ldq, int64_t)
>  #undef HELPER_LD
>  
>  #if defined(CONFIG_USER_ONLY)
> @@ -929,6 +933,10 @@ HELPER_ST(sw, stl, uint32_t)
>  #ifdef TARGET_MIPS64
>  HELPER_ST(sd, stq, uint64_t)
>  #endif
> +HELPER_ST(st8, stb, uint8_t)
> +HELPER_ST(st16, stw, uint16_t)
> +HELPER_ST(st32, stl, int32_t)
> +HELPER_ST(st64, stq, int64_t)
>  #undef HELPER_ST
>  
>  #endif /* !defined (__MIPS_CPU_H__) */
> 

do_ld8, do_ld32 and do_ld64 are duplications of existing do_lbu, do_lw
and do_ld. Similarily with do_st*.

Leon

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 05/20] target-mips: stop translation after ctc1
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (3 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 04/20] target-mips: add 8, 16, 32, 64 bits load and store Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 06/20] target-mips: add MSA opcode enum Yongbok Kim
                   ` (14 subsequent siblings)
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

stop translation as ctc1 instruction can change hflags

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/translate.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index 994e85d..cccbc44 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -6837,12 +6837,15 @@ static void gen_mttr(CPUMIPSState *env, DisasContext *ctx, int rd, int rt,
         break;
     case 3:
         /* XXX: For now we support only a single FPU context. */
+        save_cpu_state(ctx, 1);
         {
             TCGv_i32 fs_tmp = tcg_const_i32(rd);
 
             gen_helper_0e2i(ctc1, t0, fs_tmp, rt);
             tcg_temp_free_i32(fs_tmp);
         }
+        /* Stop translation as we may have changed hflags */
+        ctx->bstate = BS_STOP;
         break;
     /* COP2: Not implemented. */
     case 4:
@@ -7278,12 +7281,15 @@ static void gen_cp1 (DisasContext *ctx, uint32_t opc, int rt, int fs)
         break;
     case OPC_CTC1:
         gen_load_gpr(t0, rt);
+        save_cpu_state(ctx, 1);
         {
             TCGv_i32 fs_tmp = tcg_const_i32(fs);
 
             gen_helper_0e2i(ctc1, t0, fs_tmp, rt);
             tcg_temp_free_i32(fs_tmp);
         }
+        /* Stop translation as we may have changed hflags */
+        ctx->bstate = BS_STOP;
         opn = "ctc1";
         break;
 #if defined(TARGET_MIPS64)
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 06/20] target-mips: add MSA opcode enum
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (4 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 05/20] target-mips: stop translation after ctc1 Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-10-10  9:26   ` Leon Alrae
  2014-10-22 12:18   ` James Hogan
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 07/20] target-mips: add msa_reset(), global msa register Yongbok Kim
                   ` (13 subsequent siblings)
  19 siblings, 2 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA opcode enum

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/translate.c |  248 +++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 248 insertions(+), 0 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index cccbc44..6b4a82c 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -108,6 +108,8 @@ enum {
     OPC_SDC2     = (0x3E << 26),
     /* MDMX ASE specific */
     OPC_MDMX     = (0x1E << 26),
+    /* MSA ASE, same as MDMX */
+    OPC_MSA      = OPC_MDMX,
     /* Cache and prefetch */
     OPC_CACHE    = (0x2F << 26),
     OPC_PREF     = (0x33 << 26),
@@ -128,10 +130,12 @@ enum {
     OPC_ROTR     = OPC_SRL | (1 << 21),
     OPC_SRA      = 0x03 | OPC_SPECIAL,
     OPC_SLLV     = 0x04 | OPC_SPECIAL,
+    OPC_MSA_S05  = 0x05 | OPC_SPECIAL,
     OPC_SRLV     = 0x06 | OPC_SPECIAL, /* also ROTRV */
     OPC_ROTRV    = OPC_SRLV | (1 << 6),
     OPC_SRAV     = 0x07 | OPC_SPECIAL,
     OPC_DSLLV    = 0x14 | OPC_SPECIAL,
+    OPC_MSA_S15  = 0x15 | OPC_SPECIAL,
     OPC_DSRLV    = 0x16 | OPC_SPECIAL, /* also DROTRV */
     OPC_DROTRV   = OPC_DSRLV | (1 << 6),
     OPC_DSRAV    = 0x17 | OPC_SPECIAL,
@@ -835,6 +839,8 @@ enum {
     OPC_BC1      = (0x08 << 21) | OPC_CP1, /* bc */
     OPC_BC1ANY2  = (0x09 << 21) | OPC_CP1,
     OPC_BC1ANY4  = (0x0A << 21) | OPC_CP1,
+    OPC_MSA_BZ_V = (0x0B << 21) | OPC_CP1,
+    OPC_MSA_BNZ_V = (0x0F << 21) | OPC_CP1,
     OPC_S_FMT    = (FMT_S << 21) | OPC_CP1,
     OPC_D_FMT    = (FMT_D << 21) | OPC_CP1,
     OPC_E_FMT    = (FMT_E << 21) | OPC_CP1,
@@ -842,6 +848,14 @@ enum {
     OPC_W_FMT    = (FMT_W << 21) | OPC_CP1,
     OPC_L_FMT    = (FMT_L << 21) | OPC_CP1,
     OPC_PS_FMT   = (FMT_PS << 21) | OPC_CP1,
+    OPC_MSA_BZ_B = (0x18 << 21) | OPC_CP1,
+    OPC_MSA_BZ_H = (0x19 << 21) | OPC_CP1,
+    OPC_MSA_BZ_W = (0x1A << 21) | OPC_CP1,
+    OPC_MSA_BZ_D = (0x1B << 21) | OPC_CP1,
+    OPC_MSA_BNZ_B = (0x1C << 21) | OPC_CP1,
+    OPC_MSA_BNZ_H = (0x1D << 21) | OPC_CP1,
+    OPC_MSA_BNZ_W = (0x1E << 21) | OPC_CP1,
+    OPC_MSA_BNZ_D  = (0x1F << 21) | OPC_CP1,
 };
 
 #define MASK_CP1_FUNC(op)       MASK_CP1(op) | (op & 0x3F)
@@ -1000,6 +1014,240 @@ enum {
     OPC_NMSUB_PS= 0x3E | OPC_CP3,
 };
 
+/* MSA Opcodes */
+
+#define MASK_MSA_MINOR(op)    (MASK_OP_MAJOR(op) | (op & 0x3F))
+enum {
+    OPC_MSA_I8_00   = 0x00 | OPC_MSA,
+    OPC_MSA_I8_01   = 0x01 | OPC_MSA,
+    OPC_MSA_I8_02   = 0x02 | OPC_MSA,
+    OPC_MSA_I5_06   = 0x06 | OPC_MSA,
+    OPC_MSA_I5_07   = 0x07 | OPC_MSA,
+    OPC_MSA_BIT_09  = 0x09 | OPC_MSA,
+    OPC_MSA_BIT_0A  = 0x0A | OPC_MSA,
+    OPC_MSA_3R_0D   = 0x0D | OPC_MSA,
+    OPC_MSA_3R_0E   = 0x0E | OPC_MSA,
+    OPC_MSA_3R_0F   = 0x0F | OPC_MSA,
+    OPC_MSA_3R_10   = 0x10 | OPC_MSA,
+    OPC_MSA_3R_11   = 0x11 | OPC_MSA,
+    OPC_MSA_3R_12   = 0x12 | OPC_MSA,
+    OPC_MSA_3R_13   = 0x13 | OPC_MSA,
+    OPC_MSA_3R_14   = 0x14 | OPC_MSA,
+    OPC_MSA_3R_15   = 0x15 | OPC_MSA,
+    OPC_MSA_ELM     = 0x19 | OPC_MSA,
+    OPC_MSA_3RF_1A  = 0x1A | OPC_MSA,
+    OPC_MSA_3RF_1B  = 0x1B | OPC_MSA,
+    OPC_MSA_3RF_1C  = 0x1C | OPC_MSA,
+    OPC_MSA_VEC     = 0x1E | OPC_MSA,
+
+    /* MI10 instruction */
+    OPC_MSA_LD_B    = (0x20) | OPC_MSA,
+    OPC_MSA_LD_H    = (0x21) | OPC_MSA,
+    OPC_MSA_LD_W    = (0x22) | OPC_MSA,
+    OPC_MSA_LD_D    = (0x23) | OPC_MSA,
+    OPC_MSA_ST_B    = (0x24) | OPC_MSA,
+    OPC_MSA_ST_H    = (0x25) | OPC_MSA,
+    OPC_MSA_ST_W    = (0x26) | OPC_MSA,
+    OPC_MSA_ST_D    = (0x27) | OPC_MSA,
+};
+
+enum {
+    /* I5 instruction df(bits 22..21) = _b, _h, _w, _d */
+    OPC_MSA_ADDVI_df    = (0x0 << 23) | OPC_MSA_I5_06,
+    OPC_MSA_CEQI_df     = (0x0 << 23) | OPC_MSA_I5_07,
+    OPC_MSA_SUBVI_df    = (0x1 << 23) | OPC_MSA_I5_06,
+    OPC_MSA_MAXI_S_df   = (0x2 << 23) | OPC_MSA_I5_06,
+    OPC_MSA_CLTI_S_df   = (0x2 << 23) | OPC_MSA_I5_07,
+    OPC_MSA_MAXI_U_df   = (0x3 << 23) | OPC_MSA_I5_06,
+    OPC_MSA_CLTI_U_df   = (0x3 << 23) | OPC_MSA_I5_07,
+    OPC_MSA_MINI_S_df   = (0x4 << 23) | OPC_MSA_I5_06,
+    OPC_MSA_CLEI_S_df   = (0x4 << 23) | OPC_MSA_I5_07,
+    OPC_MSA_MINI_U_df   = (0x5 << 23) | OPC_MSA_I5_06,
+    OPC_MSA_CLEI_U_df   = (0x5 << 23) | OPC_MSA_I5_07,
+    OPC_MSA_LDI_df      = (0x6 << 23) | OPC_MSA_I5_07,
+
+    /* I8 instruction */
+    OPC_MSA_ANDI_B  = (0x0 << 24) | OPC_MSA_I8_00,
+    OPC_MSA_BMNZI_B = (0x0 << 24) | OPC_MSA_I8_01,
+    OPC_MSA_SHF_B   = (0x0 << 24) | OPC_MSA_I8_02,
+    OPC_MSA_ORI_B   = (0x1 << 24) | OPC_MSA_I8_00,
+    OPC_MSA_BMZI_B  = (0x1 << 24) | OPC_MSA_I8_01,
+    OPC_MSA_SHF_H   = (0x1 << 24) | OPC_MSA_I8_02,
+    OPC_MSA_NORI_B  = (0x2 << 24) | OPC_MSA_I8_00,
+    OPC_MSA_BSELI_B = (0x2 << 24) | OPC_MSA_I8_01,
+    OPC_MSA_SHF_W   = (0x2 << 24) | OPC_MSA_I8_02,
+    OPC_MSA_XORI_B  = (0x3 << 24) | OPC_MSA_I8_00,
+
+    /* VEC/2R/2RF instruction */
+    OPC_MSA_AND_V   = (0x00 << 21) | OPC_MSA_VEC,
+    OPC_MSA_OR_V    = (0x01 << 21) | OPC_MSA_VEC,
+    OPC_MSA_NOR_V   = (0x02 << 21) | OPC_MSA_VEC,
+    OPC_MSA_XOR_V   = (0x03 << 21) | OPC_MSA_VEC,
+    OPC_MSA_BMNZ_V  = (0x04 << 21) | OPC_MSA_VEC,
+    OPC_MSA_BMZ_V   = (0x05 << 21) | OPC_MSA_VEC,
+    OPC_MSA_BSEL_V  = (0x06 << 21) | OPC_MSA_VEC,
+
+    OPC_MSA_2R      = (0x18 << 21) | OPC_MSA_VEC,
+    OPC_MSA_2RF     = (0x19 << 21) | OPC_MSA_VEC,
+
+    /* 2R instruction df(bits 17..16) = _b, _h, _w, _d */
+    OPC_MSA_FILL_df = (0x00 << 16) | OPC_MSA_2R,
+    OPC_MSA_PCNT_df = (0x04 << 16) | OPC_MSA_2R,
+    OPC_MSA_NLOC_df = (0x08 << 16) | OPC_MSA_2R,
+    OPC_MSA_NLZC_df = (0x0C << 16) | OPC_MSA_2R,
+
+    /* 2RF instruction df(bit 16) = _w, _d */
+    OPC_MSA_FCLASS_df   = (0x00 << 16) | OPC_MSA_2RF,
+    OPC_MSA_FTRUNC_S_df = (0x02 << 16) | OPC_MSA_2RF,
+    OPC_MSA_FTRUNC_U_df = (0x04 << 16) | OPC_MSA_2RF,
+    OPC_MSA_FSQRT_df    = (0x06 << 16) | OPC_MSA_2RF,
+    OPC_MSA_FRSQRT_df   = (0x08 << 16) | OPC_MSA_2RF,
+    OPC_MSA_FRCP_df     = (0x0A << 16) | OPC_MSA_2RF,
+    OPC_MSA_FRINT_df    = (0x0C << 16) | OPC_MSA_2RF,
+    OPC_MSA_FRLOG2_df   = (0x0E << 16) | OPC_MSA_2RF,
+    OPC_MSA_FEXUPL_df   = (0x10 << 16) | OPC_MSA_2RF,
+    OPC_MSA_FEXUPR_df   = (0x12 << 16) | OPC_MSA_2RF,
+    OPC_MSA_FFQL_df     = (0x14 << 16) | OPC_MSA_2RF,
+    OPC_MSA_FFQR_df     = (0x16 << 16) | OPC_MSA_2RF,
+    OPC_MSA_FINT_S_df   = (0x18 << 16) | OPC_MSA_2RF,
+    OPC_MSA_FINT_U_df   = (0x1A << 16) | OPC_MSA_2RF,
+    OPC_MSA_FFINT_S_df  = (0x1C << 16) | OPC_MSA_2RF,
+    OPC_MSA_FFINT_U_df  = (0x1E << 16) | OPC_MSA_2RF,
+
+    /* 3R instruction df(bits 22..21) = _b, _h, _w, d */
+    OPC_MSA_SLL_df      = (0x0 << 23) | OPC_MSA_3R_0D,
+    OPC_MSA_ADDV_df     = (0x0 << 23) | OPC_MSA_3R_0E,
+    OPC_MSA_CEQ_df      = (0x0 << 23) | OPC_MSA_3R_0F,
+    OPC_MSA_ADD_A_df    = (0x0 << 23) | OPC_MSA_3R_10,
+    OPC_MSA_SUBS_S_df   = (0x0 << 23) | OPC_MSA_3R_11,
+    OPC_MSA_MULV_df     = (0x0 << 23) | OPC_MSA_3R_12,
+    OPC_MSA_DOTP_S_df   = (0x0 << 23) | OPC_MSA_3R_13,
+    OPC_MSA_SLD_df      = (0x0 << 23) | OPC_MSA_3R_14,
+    OPC_MSA_VSHF_df     = (0x0 << 23) | OPC_MSA_3R_15,
+    OPC_MSA_SRA_df      = (0x1 << 23) | OPC_MSA_3R_0D,
+    OPC_MSA_SUBV_df     = (0x1 << 23) | OPC_MSA_3R_0E,
+    OPC_MSA_ADDS_A_df   = (0x1 << 23) | OPC_MSA_3R_10,
+    OPC_MSA_SUBS_U_df   = (0x1 << 23) | OPC_MSA_3R_11,
+    OPC_MSA_MADDV_df    = (0x1 << 23) | OPC_MSA_3R_12,
+    OPC_MSA_DOTP_U_df   = (0x1 << 23) | OPC_MSA_3R_13,
+    OPC_MSA_SPLAT_df    = (0x1 << 23) | OPC_MSA_3R_14,
+    OPC_MSA_SRAR_df     = (0x1 << 23) | OPC_MSA_3R_15,
+    OPC_MSA_SRL_df      = (0x2 << 23) | OPC_MSA_3R_0D,
+    OPC_MSA_MAX_S_df    = (0x2 << 23) | OPC_MSA_3R_0E,
+    OPC_MSA_CLT_S_df    = (0x2 << 23) | OPC_MSA_3R_0F,
+    OPC_MSA_ADDS_S_df   = (0x2 << 23) | OPC_MSA_3R_10,
+    OPC_MSA_SUBSUS_U_df = (0x2 << 23) | OPC_MSA_3R_11,
+    OPC_MSA_MSUBV_df    = (0x2 << 23) | OPC_MSA_3R_12,
+    OPC_MSA_DPADD_S_df  = (0x2 << 23) | OPC_MSA_3R_13,
+    OPC_MSA_PCKEV_df    = (0x2 << 23) | OPC_MSA_3R_14,
+    OPC_MSA_SRLR_df     = (0x2 << 23) | OPC_MSA_3R_15,
+    OPC_MSA_BCLR_df     = (0x3 << 23) | OPC_MSA_3R_0D,
+    OPC_MSA_MAX_U_df    = (0x3 << 23) | OPC_MSA_3R_0E,
+    OPC_MSA_CLT_U_df    = (0x3 << 23) | OPC_MSA_3R_0F,
+    OPC_MSA_ADDS_U_df   = (0x3 << 23) | OPC_MSA_3R_10,
+    OPC_MSA_SUBSUU_S_df = (0x3 << 23) | OPC_MSA_3R_11,
+    OPC_MSA_DPADD_U_df  = (0x3 << 23) | OPC_MSA_3R_13,
+    OPC_MSA_PCKOD_df    = (0x3 << 23) | OPC_MSA_3R_14,
+    OPC_MSA_BSET_df     = (0x4 << 23) | OPC_MSA_3R_0D,
+    OPC_MSA_MIN_S_df    = (0x4 << 23) | OPC_MSA_3R_0E,
+    OPC_MSA_CLE_S_df    = (0x4 << 23) | OPC_MSA_3R_0F,
+    OPC_MSA_AVE_S_df    = (0x4 << 23) | OPC_MSA_3R_10,
+    OPC_MSA_ASUB_S_df   = (0x4 << 23) | OPC_MSA_3R_11,
+    OPC_MSA_DIV_S_df    = (0x4 << 23) | OPC_MSA_3R_12,
+    OPC_MSA_DPSUB_S_df  = (0x4 << 23) | OPC_MSA_3R_13,
+    OPC_MSA_ILVL_df     = (0x4 << 23) | OPC_MSA_3R_14,
+    OPC_MSA_HADD_S_df   = (0x4 << 23) | OPC_MSA_3R_15,
+    OPC_MSA_BNEG_df     = (0x5 << 23) | OPC_MSA_3R_0D,
+    OPC_MSA_MIN_U_df    = (0x5 << 23) | OPC_MSA_3R_0E,
+    OPC_MSA_CLE_U_df    = (0x5 << 23) | OPC_MSA_3R_0F,
+    OPC_MSA_AVE_U_df    = (0x5 << 23) | OPC_MSA_3R_10,
+    OPC_MSA_ASUB_U_df   = (0x5 << 23) | OPC_MSA_3R_11,
+    OPC_MSA_DIV_U_df    = (0x5 << 23) | OPC_MSA_3R_12,
+    OPC_MSA_DPSUB_U_df  = (0x5 << 23) | OPC_MSA_3R_13,
+    OPC_MSA_ILVR_df     = (0x5 << 23) | OPC_MSA_3R_14,
+    OPC_MSA_HADD_U_df   = (0x5 << 23) | OPC_MSA_3R_15,
+    OPC_MSA_BINSL_df    = (0x6 << 23) | OPC_MSA_3R_0D,
+    OPC_MSA_MAX_A_df    = (0x6 << 23) | OPC_MSA_3R_0E,
+    OPC_MSA_AVER_S_df   = (0x6 << 23) | OPC_MSA_3R_10,
+    OPC_MSA_MOD_S_df    = (0x6 << 23) | OPC_MSA_3R_12,
+    OPC_MSA_ILVEV_df    = (0x6 << 23) | OPC_MSA_3R_14,
+    OPC_MSA_HSUB_S_df   = (0x6 << 23) | OPC_MSA_3R_15,
+    OPC_MSA_BINSR_df    = (0x7 << 23) | OPC_MSA_3R_0D,
+    OPC_MSA_MIN_A_df    = (0x7 << 23) | OPC_MSA_3R_0E,
+    OPC_MSA_AVER_U_df   = (0x7 << 23) | OPC_MSA_3R_10,
+    OPC_MSA_MOD_U_df    = (0x7 << 23) | OPC_MSA_3R_12,
+    OPC_MSA_ILVOD_df    = (0x7 << 23) | OPC_MSA_3R_14,
+    OPC_MSA_HSUB_U_df   = (0x7 << 23) | OPC_MSA_3R_15,
+
+    /* ELM instructions df(bits 21..16) = _b, _h, _w, _d */
+    OPC_MSA_SLDI_df     = (0x0 << 22) | (0x00 << 16) | OPC_MSA_ELM,
+    OPC_MSA_CTCMSA      = (0x0 << 22) | (0x3E << 16) | OPC_MSA_ELM,
+    OPC_MSA_SPLATI_df   = (0x1 << 22) | (0x00 << 16) | OPC_MSA_ELM,
+    OPC_MSA_CFCMSA      = (0x1 << 22) | (0x3E << 16) | OPC_MSA_ELM,
+    OPC_MSA_COPY_S_df   = (0x2 << 22) | (0x00 << 16) | OPC_MSA_ELM,
+    OPC_MSA_MOVE_V      = (0x2 << 22) | (0x3E << 16) | OPC_MSA_ELM,
+    OPC_MSA_COPY_U_df   = (0x3 << 22) | (0x00 << 16) | OPC_MSA_ELM,
+    OPC_MSA_INSERT_df   = (0x4 << 22) | (0x00 << 16) | OPC_MSA_ELM,
+    OPC_MSA_INSVE_df    = (0x5 << 22) | (0x00 << 16) | OPC_MSA_ELM,
+
+    /* 3RF instruction _df(bit 21) = _w, _d */
+    OPC_MSA_FCAF_df     = (0x0 << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FADD_df     = (0x0 << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_FCUN_df     = (0x1 << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FSUB_df     = (0x1 << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_FCOR_df     = (0x1 << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FCEQ_df     = (0x2 << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FMUL_df     = (0x2 << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_FCUNE_df    = (0x2 << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FCUEQ_df    = (0x3 << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FDIV_df     = (0x3 << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_FCNE_df     = (0x3 << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FCLT_df     = (0x4 << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FMADD_df    = (0x4 << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_MUL_Q_df    = (0x4 << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FCULT_df    = (0x5 << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FMSUB_df    = (0x5 << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_MADD_Q_df   = (0x5 << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FCLE_df     = (0x6 << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_MSUB_Q_df   = (0x6 << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FCULE_df    = (0x7 << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FEXP2_df    = (0x7 << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_FSAF_df     = (0x8 << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FEXDO_df    = (0x8 << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_FSUN_df     = (0x9 << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FSOR_df     = (0x9 << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FSEQ_df     = (0xA << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FTQ_df      = (0xA << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_FSUNE_df    = (0xA << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FSUEQ_df    = (0xB << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FSNE_df     = (0xB << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FSLT_df     = (0xC << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FMIN_df     = (0xC << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_MULR_Q_df   = (0xC << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FSULT_df    = (0xD << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FMIN_A_df   = (0xD << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_MADDR_Q_df  = (0xD << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FSLE_df     = (0xE << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FMAX_df     = (0xE << 22) | OPC_MSA_3RF_1B,
+    OPC_MSA_MSUBR_Q_df  = (0xE << 22) | OPC_MSA_3RF_1C,
+    OPC_MSA_FSULE_df    = (0xF << 22) | OPC_MSA_3RF_1A,
+    OPC_MSA_FMAX_A_df   = (0xF << 22) | OPC_MSA_3RF_1B,
+
+    /* BIT instruction df(bits 22..16) = _B _H _W _D */
+    OPC_MSA_SLLI_df     = (0x0 << 23) | OPC_MSA_BIT_09,
+    OPC_MSA_SAT_S_df    = (0x0 << 23) | OPC_MSA_BIT_0A,
+    OPC_MSA_SRAI_df     = (0x1 << 23) | OPC_MSA_BIT_09,
+    OPC_MSA_SAT_U_df    = (0x1 << 23) | OPC_MSA_BIT_0A,
+    OPC_MSA_SRLI_df     = (0x2 << 23) | OPC_MSA_BIT_09,
+    OPC_MSA_SRARI_df    = (0x2 << 23) | OPC_MSA_BIT_0A,
+    OPC_MSA_BCLRI_df    = (0x3 << 23) | OPC_MSA_BIT_09,
+    OPC_MSA_SRLRI_df    = (0x3 << 23) | OPC_MSA_BIT_0A,
+    OPC_MSA_BSETI_df    = (0x4 << 23) | OPC_MSA_BIT_09,
+    OPC_MSA_BNEGI_df    = (0x5 << 23) | OPC_MSA_BIT_09,
+    OPC_MSA_BINSLI_df   = (0x6 << 23) | OPC_MSA_BIT_09,
+    OPC_MSA_BINSRI_df   = (0x7 << 23) | OPC_MSA_BIT_09,
+};
+
 /* global register indices */
 static TCGv_ptr cpu_env;
 static TCGv cpu_gpr[32], cpu_PC;
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 06/20] target-mips: add MSA opcode enum
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 06/20] target-mips: add MSA opcode enum Yongbok Kim
@ 2014-10-10  9:26   ` Leon Alrae
  2014-10-22 12:18   ` James Hogan
  1 sibling, 0 replies; 35+ messages in thread
From: Leon Alrae @ 2014-10-10  9:26 UTC (permalink / raw)
  To: Yongbok Kim, qemu-devel; +Cc: cristian.cuna, aurelien

On 14/07/2014 10:55, Yongbok Kim wrote:
> add MSA opcode enum
> 
> Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
> ---
>  target-mips/translate.c |  248 +++++++++++++++++++++++++++++++++++++++++++++++
>  1 files changed, 248 insertions(+), 0 deletions(-)
> 
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index cccbc44..6b4a82c 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -108,6 +108,8 @@ enum {
>      OPC_SDC2     = (0x3E << 26),
>      /* MDMX ASE specific */
>      OPC_MDMX     = (0x1E << 26),
> +    /* MSA ASE, same as MDMX */
> +    OPC_MSA      = OPC_MDMX,
>      /* Cache and prefetch */
>      OPC_CACHE    = (0x2F << 26),
>      OPC_PREF     = (0x33 << 26),
> @@ -128,10 +130,12 @@ enum {
>      OPC_ROTR     = OPC_SRL | (1 << 21),
>      OPC_SRA      = 0x03 | OPC_SPECIAL,
>      OPC_SLLV     = 0x04 | OPC_SPECIAL,
> +    OPC_MSA_S05  = 0x05 | OPC_SPECIAL,
>      OPC_SRLV     = 0x06 | OPC_SPECIAL, /* also ROTRV */
>      OPC_ROTRV    = OPC_SRLV | (1 << 6),
>      OPC_SRAV     = 0x07 | OPC_SPECIAL,
>      OPC_DSLLV    = 0x14 | OPC_SPECIAL,
> +    OPC_MSA_S15  = 0x15 | OPC_SPECIAL,

OPC_MSA_S05 and OPC_MSA_S15 seem to be unused.

>      OPC_DSRLV    = 0x16 | OPC_SPECIAL, /* also DROTRV */
>      OPC_DROTRV   = OPC_DSRLV | (1 << 6),
>      OPC_DSRAV    = 0x17 | OPC_SPECIAL,
> @@ -835,6 +839,8 @@ enum {
>      OPC_BC1      = (0x08 << 21) | OPC_CP1, /* bc */
>      OPC_BC1ANY2  = (0x09 << 21) | OPC_CP1,
>      OPC_BC1ANY4  = (0x0A << 21) | OPC_CP1,
> +    OPC_MSA_BZ_V = (0x0B << 21) | OPC_CP1,
> +    OPC_MSA_BNZ_V = (0x0F << 21) | OPC_CP1,
>      OPC_S_FMT    = (FMT_S << 21) | OPC_CP1,
>      OPC_D_FMT    = (FMT_D << 21) | OPC_CP1,
>      OPC_E_FMT    = (FMT_E << 21) | OPC_CP1,
> @@ -842,6 +848,14 @@ enum {
>      OPC_W_FMT    = (FMT_W << 21) | OPC_CP1,
>      OPC_L_FMT    = (FMT_L << 21) | OPC_CP1,
>      OPC_PS_FMT   = (FMT_PS << 21) | OPC_CP1,
> +    OPC_MSA_BZ_B = (0x18 << 21) | OPC_CP1,
> +    OPC_MSA_BZ_H = (0x19 << 21) | OPC_CP1,
> +    OPC_MSA_BZ_W = (0x1A << 21) | OPC_CP1,
> +    OPC_MSA_BZ_D = (0x1B << 21) | OPC_CP1,
> +    OPC_MSA_BNZ_B = (0x1C << 21) | OPC_CP1,
> +    OPC_MSA_BNZ_H = (0x1D << 21) | OPC_CP1,
> +    OPC_MSA_BNZ_W = (0x1E << 21) | OPC_CP1,
> +    OPC_MSA_BNZ_D  = (0x1F << 21) | OPC_CP1,
Extra whitespace    ^

>  };
>  
>  #define MASK_CP1_FUNC(op)       MASK_CP1(op) | (op & 0x3F)
> @@ -1000,6 +1014,240 @@ enum {
>      OPC_NMSUB_PS= 0x3E | OPC_CP3,
>  };
>  
> +/* MSA Opcodes */
> +
> +#define MASK_MSA_MINOR(op)    (MASK_OP_MAJOR(op) | (op & 0x3F))
> +enum {
> +    OPC_MSA_I8_00   = 0x00 | OPC_MSA,
> +    OPC_MSA_I8_01   = 0x01 | OPC_MSA,
> +    OPC_MSA_I8_02   = 0x02 | OPC_MSA,
> +    OPC_MSA_I5_06   = 0x06 | OPC_MSA,
> +    OPC_MSA_I5_07   = 0x07 | OPC_MSA,
> +    OPC_MSA_BIT_09  = 0x09 | OPC_MSA,
> +    OPC_MSA_BIT_0A  = 0x0A | OPC_MSA,
> +    OPC_MSA_3R_0D   = 0x0D | OPC_MSA,
> +    OPC_MSA_3R_0E   = 0x0E | OPC_MSA,
> +    OPC_MSA_3R_0F   = 0x0F | OPC_MSA,
> +    OPC_MSA_3R_10   = 0x10 | OPC_MSA,
> +    OPC_MSA_3R_11   = 0x11 | OPC_MSA,
> +    OPC_MSA_3R_12   = 0x12 | OPC_MSA,
> +    OPC_MSA_3R_13   = 0x13 | OPC_MSA,
> +    OPC_MSA_3R_14   = 0x14 | OPC_MSA,
> +    OPC_MSA_3R_15   = 0x15 | OPC_MSA,
> +    OPC_MSA_ELM     = 0x19 | OPC_MSA,
> +    OPC_MSA_3RF_1A  = 0x1A | OPC_MSA,
> +    OPC_MSA_3RF_1B  = 0x1B | OPC_MSA,
> +    OPC_MSA_3RF_1C  = 0x1C | OPC_MSA,
> +    OPC_MSA_VEC     = 0x1E | OPC_MSA,
> +
> +    /* MI10 instruction */
> +    OPC_MSA_LD_B    = (0x20) | OPC_MSA,
> +    OPC_MSA_LD_H    = (0x21) | OPC_MSA,
> +    OPC_MSA_LD_W    = (0x22) | OPC_MSA,
> +    OPC_MSA_LD_D    = (0x23) | OPC_MSA,
> +    OPC_MSA_ST_B    = (0x24) | OPC_MSA,
> +    OPC_MSA_ST_H    = (0x25) | OPC_MSA,
> +    OPC_MSA_ST_W    = (0x26) | OPC_MSA,
> +    OPC_MSA_ST_D    = (0x27) | OPC_MSA,
> +};
> +
> +enum {
> +    /* I5 instruction df(bits 22..21) = _b, _h, _w, _d */
> +    OPC_MSA_ADDVI_df    = (0x0 << 23) | OPC_MSA_I5_06,
> +    OPC_MSA_CEQI_df     = (0x0 << 23) | OPC_MSA_I5_07,
> +    OPC_MSA_SUBVI_df    = (0x1 << 23) | OPC_MSA_I5_06,
> +    OPC_MSA_MAXI_S_df   = (0x2 << 23) | OPC_MSA_I5_06,
> +    OPC_MSA_CLTI_S_df   = (0x2 << 23) | OPC_MSA_I5_07,
> +    OPC_MSA_MAXI_U_df   = (0x3 << 23) | OPC_MSA_I5_06,
> +    OPC_MSA_CLTI_U_df   = (0x3 << 23) | OPC_MSA_I5_07,
> +    OPC_MSA_MINI_S_df   = (0x4 << 23) | OPC_MSA_I5_06,
> +    OPC_MSA_CLEI_S_df   = (0x4 << 23) | OPC_MSA_I5_07,
> +    OPC_MSA_MINI_U_df   = (0x5 << 23) | OPC_MSA_I5_06,
> +    OPC_MSA_CLEI_U_df   = (0x5 << 23) | OPC_MSA_I5_07,
> +    OPC_MSA_LDI_df      = (0x6 << 23) | OPC_MSA_I5_07,
> +
> +    /* I8 instruction */
> +    OPC_MSA_ANDI_B  = (0x0 << 24) | OPC_MSA_I8_00,
> +    OPC_MSA_BMNZI_B = (0x0 << 24) | OPC_MSA_I8_01,
> +    OPC_MSA_SHF_B   = (0x0 << 24) | OPC_MSA_I8_02,
> +    OPC_MSA_ORI_B   = (0x1 << 24) | OPC_MSA_I8_00,
> +    OPC_MSA_BMZI_B  = (0x1 << 24) | OPC_MSA_I8_01,
> +    OPC_MSA_SHF_H   = (0x1 << 24) | OPC_MSA_I8_02,
> +    OPC_MSA_NORI_B  = (0x2 << 24) | OPC_MSA_I8_00,
> +    OPC_MSA_BSELI_B = (0x2 << 24) | OPC_MSA_I8_01,
> +    OPC_MSA_SHF_W   = (0x2 << 24) | OPC_MSA_I8_02,
> +    OPC_MSA_XORI_B  = (0x3 << 24) | OPC_MSA_I8_00,
> +
> +    /* VEC/2R/2RF instruction */
> +    OPC_MSA_AND_V   = (0x00 << 21) | OPC_MSA_VEC,
> +    OPC_MSA_OR_V    = (0x01 << 21) | OPC_MSA_VEC,
> +    OPC_MSA_NOR_V   = (0x02 << 21) | OPC_MSA_VEC,
> +    OPC_MSA_XOR_V   = (0x03 << 21) | OPC_MSA_VEC,
> +    OPC_MSA_BMNZ_V  = (0x04 << 21) | OPC_MSA_VEC,
> +    OPC_MSA_BMZ_V   = (0x05 << 21) | OPC_MSA_VEC,
> +    OPC_MSA_BSEL_V  = (0x06 << 21) | OPC_MSA_VEC,
> +
> +    OPC_MSA_2R      = (0x18 << 21) | OPC_MSA_VEC,
> +    OPC_MSA_2RF     = (0x19 << 21) | OPC_MSA_VEC,
> +
> +    /* 2R instruction df(bits 17..16) = _b, _h, _w, _d */
> +    OPC_MSA_FILL_df = (0x00 << 16) | OPC_MSA_2R,
> +    OPC_MSA_PCNT_df = (0x04 << 16) | OPC_MSA_2R,
> +    OPC_MSA_NLOC_df = (0x08 << 16) | OPC_MSA_2R,
> +    OPC_MSA_NLZC_df = (0x0C << 16) | OPC_MSA_2R,
> +
> +    /* 2RF instruction df(bit 16) = _w, _d */
> +    OPC_MSA_FCLASS_df   = (0x00 << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FTRUNC_S_df = (0x02 << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FTRUNC_U_df = (0x04 << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FSQRT_df    = (0x06 << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FRSQRT_df   = (0x08 << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FRCP_df     = (0x0A << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FRINT_df    = (0x0C << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FRLOG2_df   = (0x0E << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FEXUPL_df   = (0x10 << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FEXUPR_df   = (0x12 << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FFQL_df     = (0x14 << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FFQR_df     = (0x16 << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FINT_S_df   = (0x18 << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FINT_U_df   = (0x1A << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FFINT_S_df  = (0x1C << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FFINT_U_df  = (0x1E << 16) | OPC_MSA_2RF,
> +
> +    /* 3R instruction df(bits 22..21) = _b, _h, _w, d */
> +    OPC_MSA_SLL_df      = (0x0 << 23) | OPC_MSA_3R_0D,
> +    OPC_MSA_ADDV_df     = (0x0 << 23) | OPC_MSA_3R_0E,
> +    OPC_MSA_CEQ_df      = (0x0 << 23) | OPC_MSA_3R_0F,
> +    OPC_MSA_ADD_A_df    = (0x0 << 23) | OPC_MSA_3R_10,
> +    OPC_MSA_SUBS_S_df   = (0x0 << 23) | OPC_MSA_3R_11,
> +    OPC_MSA_MULV_df     = (0x0 << 23) | OPC_MSA_3R_12,
> +    OPC_MSA_DOTP_S_df   = (0x0 << 23) | OPC_MSA_3R_13,
> +    OPC_MSA_SLD_df      = (0x0 << 23) | OPC_MSA_3R_14,
> +    OPC_MSA_VSHF_df     = (0x0 << 23) | OPC_MSA_3R_15,
> +    OPC_MSA_SRA_df      = (0x1 << 23) | OPC_MSA_3R_0D,
> +    OPC_MSA_SUBV_df     = (0x1 << 23) | OPC_MSA_3R_0E,
> +    OPC_MSA_ADDS_A_df   = (0x1 << 23) | OPC_MSA_3R_10,
> +    OPC_MSA_SUBS_U_df   = (0x1 << 23) | OPC_MSA_3R_11,
> +    OPC_MSA_MADDV_df    = (0x1 << 23) | OPC_MSA_3R_12,
> +    OPC_MSA_DOTP_U_df   = (0x1 << 23) | OPC_MSA_3R_13,
> +    OPC_MSA_SPLAT_df    = (0x1 << 23) | OPC_MSA_3R_14,
> +    OPC_MSA_SRAR_df     = (0x1 << 23) | OPC_MSA_3R_15,
> +    OPC_MSA_SRL_df      = (0x2 << 23) | OPC_MSA_3R_0D,
> +    OPC_MSA_MAX_S_df    = (0x2 << 23) | OPC_MSA_3R_0E,
> +    OPC_MSA_CLT_S_df    = (0x2 << 23) | OPC_MSA_3R_0F,
> +    OPC_MSA_ADDS_S_df   = (0x2 << 23) | OPC_MSA_3R_10,
> +    OPC_MSA_SUBSUS_U_df = (0x2 << 23) | OPC_MSA_3R_11,
> +    OPC_MSA_MSUBV_df    = (0x2 << 23) | OPC_MSA_3R_12,
> +    OPC_MSA_DPADD_S_df  = (0x2 << 23) | OPC_MSA_3R_13,
> +    OPC_MSA_PCKEV_df    = (0x2 << 23) | OPC_MSA_3R_14,
> +    OPC_MSA_SRLR_df     = (0x2 << 23) | OPC_MSA_3R_15,
> +    OPC_MSA_BCLR_df     = (0x3 << 23) | OPC_MSA_3R_0D,
> +    OPC_MSA_MAX_U_df    = (0x3 << 23) | OPC_MSA_3R_0E,
> +    OPC_MSA_CLT_U_df    = (0x3 << 23) | OPC_MSA_3R_0F,
> +    OPC_MSA_ADDS_U_df   = (0x3 << 23) | OPC_MSA_3R_10,
> +    OPC_MSA_SUBSUU_S_df = (0x3 << 23) | OPC_MSA_3R_11,
> +    OPC_MSA_DPADD_U_df  = (0x3 << 23) | OPC_MSA_3R_13,
> +    OPC_MSA_PCKOD_df    = (0x3 << 23) | OPC_MSA_3R_14,
> +    OPC_MSA_BSET_df     = (0x4 << 23) | OPC_MSA_3R_0D,
> +    OPC_MSA_MIN_S_df    = (0x4 << 23) | OPC_MSA_3R_0E,
> +    OPC_MSA_CLE_S_df    = (0x4 << 23) | OPC_MSA_3R_0F,
> +    OPC_MSA_AVE_S_df    = (0x4 << 23) | OPC_MSA_3R_10,
> +    OPC_MSA_ASUB_S_df   = (0x4 << 23) | OPC_MSA_3R_11,
> +    OPC_MSA_DIV_S_df    = (0x4 << 23) | OPC_MSA_3R_12,
> +    OPC_MSA_DPSUB_S_df  = (0x4 << 23) | OPC_MSA_3R_13,
> +    OPC_MSA_ILVL_df     = (0x4 << 23) | OPC_MSA_3R_14,
> +    OPC_MSA_HADD_S_df   = (0x4 << 23) | OPC_MSA_3R_15,
> +    OPC_MSA_BNEG_df     = (0x5 << 23) | OPC_MSA_3R_0D,
> +    OPC_MSA_MIN_U_df    = (0x5 << 23) | OPC_MSA_3R_0E,
> +    OPC_MSA_CLE_U_df    = (0x5 << 23) | OPC_MSA_3R_0F,
> +    OPC_MSA_AVE_U_df    = (0x5 << 23) | OPC_MSA_3R_10,
> +    OPC_MSA_ASUB_U_df   = (0x5 << 23) | OPC_MSA_3R_11,
> +    OPC_MSA_DIV_U_df    = (0x5 << 23) | OPC_MSA_3R_12,
> +    OPC_MSA_DPSUB_U_df  = (0x5 << 23) | OPC_MSA_3R_13,
> +    OPC_MSA_ILVR_df     = (0x5 << 23) | OPC_MSA_3R_14,
> +    OPC_MSA_HADD_U_df   = (0x5 << 23) | OPC_MSA_3R_15,
> +    OPC_MSA_BINSL_df    = (0x6 << 23) | OPC_MSA_3R_0D,
> +    OPC_MSA_MAX_A_df    = (0x6 << 23) | OPC_MSA_3R_0E,
> +    OPC_MSA_AVER_S_df   = (0x6 << 23) | OPC_MSA_3R_10,
> +    OPC_MSA_MOD_S_df    = (0x6 << 23) | OPC_MSA_3R_12,
> +    OPC_MSA_ILVEV_df    = (0x6 << 23) | OPC_MSA_3R_14,
> +    OPC_MSA_HSUB_S_df   = (0x6 << 23) | OPC_MSA_3R_15,
> +    OPC_MSA_BINSR_df    = (0x7 << 23) | OPC_MSA_3R_0D,
> +    OPC_MSA_MIN_A_df    = (0x7 << 23) | OPC_MSA_3R_0E,
> +    OPC_MSA_AVER_U_df   = (0x7 << 23) | OPC_MSA_3R_10,
> +    OPC_MSA_MOD_U_df    = (0x7 << 23) | OPC_MSA_3R_12,
> +    OPC_MSA_ILVOD_df    = (0x7 << 23) | OPC_MSA_3R_14,
> +    OPC_MSA_HSUB_U_df   = (0x7 << 23) | OPC_MSA_3R_15,
> +
> +    /* ELM instructions df(bits 21..16) = _b, _h, _w, _d */
> +    OPC_MSA_SLDI_df     = (0x0 << 22) | (0x00 << 16) | OPC_MSA_ELM,
> +    OPC_MSA_CTCMSA      = (0x0 << 22) | (0x3E << 16) | OPC_MSA_ELM,
> +    OPC_MSA_SPLATI_df   = (0x1 << 22) | (0x00 << 16) | OPC_MSA_ELM,
> +    OPC_MSA_CFCMSA      = (0x1 << 22) | (0x3E << 16) | OPC_MSA_ELM,
> +    OPC_MSA_COPY_S_df   = (0x2 << 22) | (0x00 << 16) | OPC_MSA_ELM,
> +    OPC_MSA_MOVE_V      = (0x2 << 22) | (0x3E << 16) | OPC_MSA_ELM,
> +    OPC_MSA_COPY_U_df   = (0x3 << 22) | (0x00 << 16) | OPC_MSA_ELM,
> +    OPC_MSA_INSERT_df   = (0x4 << 22) | (0x00 << 16) | OPC_MSA_ELM,
> +    OPC_MSA_INSVE_df    = (0x5 << 22) | (0x00 << 16) | OPC_MSA_ELM,
> +
> +    /* 3RF instruction _df(bit 21) = _w, _d */
> +    OPC_MSA_FCAF_df     = (0x0 << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FADD_df     = (0x0 << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_FCUN_df     = (0x1 << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FSUB_df     = (0x1 << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_FCOR_df     = (0x1 << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FCEQ_df     = (0x2 << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FMUL_df     = (0x2 << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_FCUNE_df    = (0x2 << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FCUEQ_df    = (0x3 << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FDIV_df     = (0x3 << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_FCNE_df     = (0x3 << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FCLT_df     = (0x4 << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FMADD_df    = (0x4 << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_MUL_Q_df    = (0x4 << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FCULT_df    = (0x5 << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FMSUB_df    = (0x5 << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_MADD_Q_df   = (0x5 << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FCLE_df     = (0x6 << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_MSUB_Q_df   = (0x6 << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FCULE_df    = (0x7 << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FEXP2_df    = (0x7 << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_FSAF_df     = (0x8 << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FEXDO_df    = (0x8 << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_FSUN_df     = (0x9 << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FSOR_df     = (0x9 << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FSEQ_df     = (0xA << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FTQ_df      = (0xA << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_FSUNE_df    = (0xA << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FSUEQ_df    = (0xB << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FSNE_df     = (0xB << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FSLT_df     = (0xC << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FMIN_df     = (0xC << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_MULR_Q_df   = (0xC << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FSULT_df    = (0xD << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FMIN_A_df   = (0xD << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_MADDR_Q_df  = (0xD << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FSLE_df     = (0xE << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FMAX_df     = (0xE << 22) | OPC_MSA_3RF_1B,
> +    OPC_MSA_MSUBR_Q_df  = (0xE << 22) | OPC_MSA_3RF_1C,
> +    OPC_MSA_FSULE_df    = (0xF << 22) | OPC_MSA_3RF_1A,
> +    OPC_MSA_FMAX_A_df   = (0xF << 22) | OPC_MSA_3RF_1B,
> +
> +    /* BIT instruction df(bits 22..16) = _B _H _W _D */
> +    OPC_MSA_SLLI_df     = (0x0 << 23) | OPC_MSA_BIT_09,
> +    OPC_MSA_SAT_S_df    = (0x0 << 23) | OPC_MSA_BIT_0A,
> +    OPC_MSA_SRAI_df     = (0x1 << 23) | OPC_MSA_BIT_09,
> +    OPC_MSA_SAT_U_df    = (0x1 << 23) | OPC_MSA_BIT_0A,
> +    OPC_MSA_SRLI_df     = (0x2 << 23) | OPC_MSA_BIT_09,
> +    OPC_MSA_SRARI_df    = (0x2 << 23) | OPC_MSA_BIT_0A,
> +    OPC_MSA_BCLRI_df    = (0x3 << 23) | OPC_MSA_BIT_09,
> +    OPC_MSA_SRLRI_df    = (0x3 << 23) | OPC_MSA_BIT_0A,
> +    OPC_MSA_BSETI_df    = (0x4 << 23) | OPC_MSA_BIT_09,
> +    OPC_MSA_BNEGI_df    = (0x5 << 23) | OPC_MSA_BIT_09,
> +    OPC_MSA_BINSLI_df   = (0x6 << 23) | OPC_MSA_BIT_09,
> +    OPC_MSA_BINSRI_df   = (0x7 << 23) | OPC_MSA_BIT_09,
> +};
> +
>  /* global register indices */
>  static TCGv_ptr cpu_env;
>  static TCGv cpu_gpr[32], cpu_PC;
> 

Otherwise it looks good to me:

Reviewed-by: Leon Alrae <leon.alrae@imgtec.com>

Leon

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 06/20] target-mips: add MSA opcode enum
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 06/20] target-mips: add MSA opcode enum Yongbok Kim
  2014-10-10  9:26   ` Leon Alrae
@ 2014-10-22 12:18   ` James Hogan
  1 sibling, 0 replies; 35+ messages in thread
From: James Hogan @ 2014-10-22 12:18 UTC (permalink / raw)
  To: Yongbok Kim, qemu-devel; +Cc: cristian.cuna, leon.alrae, aurelien

Hi,

On 14/07/14 10:55, Yongbok Kim wrote:
> @@ -835,6 +839,8 @@ enum {
>      OPC_BC1      = (0x08 << 21) | OPC_CP1, /* bc */
>      OPC_BC1ANY2  = (0x09 << 21) | OPC_CP1,
>      OPC_BC1ANY4  = (0x0A << 21) | OPC_CP1,
> +    OPC_MSA_BZ_V = (0x0B << 21) | OPC_CP1,
> +    OPC_MSA_BNZ_V = (0x0F << 21) | OPC_CP1,

I don't think any of the existing secondary opcodes have the ASE
prefixed, and the instruction mnemonics should already be unique, so is
it worth dropping the MSA_ on these and the other secondary opcodes?

> +    /* 2R instruction df(bits 17..16) = _b, _h, _w, _d */
> +    OPC_MSA_FILL_df = (0x00 << 16) | OPC_MSA_2R,
> +    OPC_MSA_PCNT_df = (0x04 << 16) | OPC_MSA_2R,
> +    OPC_MSA_NLOC_df = (0x08 << 16) | OPC_MSA_2R,
> +    OPC_MSA_NLZC_df = (0x0C << 16) | OPC_MSA_2R,

it might be more consistent (and more directly comparable to the
encoding table in the manual) to use a shift of 18 here, kind of like
you did for the I5 instructions.

Same for the 2RF ones below but with a shift of 17.

> +
> +    /* 2RF instruction df(bit 16) = _w, _d */

> +    OPC_MSA_FINT_S_df   = (0x18 << 16) | OPC_MSA_2RF,
> +    OPC_MSA_FINT_U_df   = (0x1A << 16) | OPC_MSA_2RF,

the manual calls these two FTINT rather than FINT

Otherwise
Reviewed-by: James Hogan <james.hogan@imgtec.com>

Cheers
James

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 07/20] target-mips: add msa_reset(), global msa register
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (5 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 06/20] target-mips: add MSA opcode enum Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-10-22 13:21   ` James Hogan
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 08/20] target-mips: add msa_helper.c Yongbok Kim
                   ` (12 subsequent siblings)
  19 siblings, 1 reply; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add msa_reset() and global msa register (d type only)

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/translate.c      |   74 ++++++++++++++++++++++++++++++++++++++++++
 target-mips/translate_init.c |   45 +++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 0 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index 6b4a82c..b8dbbdc 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -1256,6 +1256,7 @@ static TCGv cpu_dspctrl, btarget, bcond;
 static TCGv_i32 hflags;
 static TCGv_i32 fpu_fcr0, fpu_fcr31;
 static TCGv_i64 fpu_f64[32];
+static TCGv_i64 msa_wr_d[64];
 
 static uint32_t gen_opc_hflags[OPC_BUF_SIZE];
 static target_ulong gen_opc_btarget[OPC_BUF_SIZE];
@@ -1353,6 +1354,25 @@ static const char * const fregnames[] = {
     "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",
 };
 
+static const char * const msaregnames[] = {
+    "w0.d0",  "w0.d1",  "w1.d0",  "w1.d1",
+    "w2.d0",  "w2.d1",  "w3.d0",  "w3.d1",
+    "w4.d0",  "w4.d1",  "w4.d0",  "w4.d1",
+    "w6.d0",  "w6.d1",  "w7.d0",  "w7.d1",
+    "w8.d0",  "w8.d1",  "w9.d0",  "w9.d1",
+    "w10.d0", "w10.d1", "w11.d0", "w11.d1",
+    "w12.d0", "w12.d1", "w13.d0", "w13.d1",
+    "w14.d0", "w14.d1", "w15.d0", "w15.d1",
+    "w16.d0", "w16.d1", "w17.d0", "w17.d1",
+    "w18.d0", "w18.d1", "w19.d0", "w19.d1",
+    "w20.d0", "w20.d1", "w21.d0", "w21.d1",
+    "w22.d0", "w22.d1", "w23.d0", "w23.d1",
+    "w24.d0", "w24.d1", "w25.d0", "w25.d1",
+    "w26.d0", "w26.d1", "w27.d0", "w27.d1",
+    "w28.d0", "w28.d1", "w29.d0", "w29.d1",
+    "w30.d0", "w30.d1", "w31.d0", "w31.d1",
+};
+
 #define MIPS_DEBUG(fmt, ...)                                                  \
     do {                                                                      \
         if (MIPS_DEBUG_DISAS) {                                               \
@@ -14627,6 +14647,47 @@ static void gen_mipsdsp_accinsn(DisasContext *ctx, uint32_t op1, uint32_t op2,
 
 /* End MIPSDSP functions. */
 
+/* MIPS SIMD Architecture (MSA)  */
+
+static inline int check_msa_access(CPUMIPSState *env, DisasContext *ctx,
+                                    int wt, int ws, int wd)
+{
+    if (unlikely((ctx->hflags & MIPS_HFLAG_FPU) &&
+                 !(ctx->hflags & MIPS_HFLAG_F64))) {
+        generate_exception(ctx, EXCP_RI);
+        return 0;
+    }
+
+    if (unlikely(!(ctx->hflags & MIPS_HFLAG_MSA))) {
+        if (ctx->insn_flags & ASE_MSA) {
+            generate_exception(ctx, EXCP_MSADIS);
+            return 0;
+        } else {
+            generate_exception(ctx, EXCP_RI);
+            return 0;
+        }
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        int curr_request  = 0;
+        if (wd != -1) {
+            curr_request |= (1 << wd);
+        }
+        if (wt != -1) {
+            curr_request |= (1 << wt);
+        }
+        if (ws != -1) {
+            curr_request |= (1 << ws);
+        }
+        env->active_msa.msarequest = curr_request
+                & (~env->active_msa.msaaccess | env->active_msa.msasave);
+        if (unlikely(env->active_msa.msarequest != 0)) {
+            generate_exception(ctx, EXCP_MSADIS);
+            return 0;
+        }
+    }
+    return 1;
+}
 static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
 {
     int32_t offset;
@@ -16119,6 +16180,15 @@ void mips_tcg_init(void)
         fpu_f64[i] = tcg_global_mem_new_i64(TCG_AREG0, off, fregnames[i]);
     }
 
+    for (i = 0; i < 32; i++) {
+        int off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[0]);
+        msa_wr_d[i * 2] =
+                tcg_global_mem_new_i64(TCG_AREG0, off, msaregnames[i * 2]);
+        off = offsetof(CPUMIPSState, active_fpu.fpr[i].wr.d[1]);
+        msa_wr_d[i * 2 + 1] =
+                tcg_global_mem_new_i64(TCG_AREG0, off, msaregnames[i * 2 + 1]);
+    }
+
     cpu_PC = tcg_global_mem_new(TCG_AREG0,
                                 offsetof(CPUMIPSState, active_tc.PC), "PC");
     for (i = 0; i < MIPS_DSP_ACC; i++) {
@@ -16318,6 +16388,10 @@ void cpu_state_reset(CPUMIPSState *env)
         }
     }
 #endif
+    /* MSA */
+    if (env->CP0_Config3 & (1 << CP0C3_MSAP)) {
+        msa_reset(env);
+    }
     compute_hflags(env);
     cs->exception_index = EXCP_NONE;
 }
diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
index 29dc2ef..9e0f67b 100644
--- a/target-mips/translate_init.c
+++ b/target-mips/translate_init.c
@@ -688,3 +688,48 @@ static void mvp_init (CPUMIPSState *env, const mips_def_t *def)
                              (0x0 << CP0MVPC1_PCX) | (0x0 << CP0MVPC1_PCP2) |
                              (0x1 << CP0MVPC1_PCP1);
 }
+
+static void msa_reset(CPUMIPSState *env)
+{
+#ifdef CONFIG_USER_ONLY
+    /* MSA access enabled */
+    env->CP0_Config5 |= 1 << CP0C5_MSAEn;
+
+    /* DSP and CP1 enabled, 64-bit FPRs */
+    env->CP0_Status |= (1 << CP0St_MX);
+    env->hflags |= MIPS_HFLAG_DSP;
+
+    env->CP0_Status |= (1 << CP0St_CU1) | (1 << CP0St_FR);
+    env->hflags |= MIPS_HFLAG_F64 | MIPS_HFLAG_COP1X;
+#endif
+
+    /* Vector register partitioning not implemented */
+    env->active_msa.msair = 0;
+    env->active_msa.msaaccess  = 0xffffffff;
+    env->active_msa.msasave    = 0;
+    env->active_msa.msarequest = 0;
+
+    /* MSA CSR:
+       - non-signaling floating point exception mode off (NX bit is 0)
+       - Cause, Enables, and Flags are all 0
+       - round to nearest / ties to even (RM bits are 0) */
+    env->active_msa.msacsr = 0;
+
+    /* tininess detected after rounding.*/
+    set_float_detect_tininess(float_tininess_after_rounding,
+                              &env->active_msa.fp_status);
+
+    /* clear float_status exception flags */
+    set_float_exception_flags(0, &env->active_msa.fp_status);
+
+    /* set float_status rounding mode */
+    set_float_rounding_mode(float_round_nearest_even,
+                            &env->active_msa.fp_status);
+
+    /* set float_status flush modes */
+    set_flush_to_zero(0, &env->active_msa.fp_status);
+    set_flush_inputs_to_zero(0, &env->active_msa.fp_status);
+
+    /* clear float_status nan mode */
+    set_default_nan_mode(0, &env->active_msa.fp_status);
+}
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 07/20] target-mips: add msa_reset(), global msa register
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 07/20] target-mips: add msa_reset(), global msa register Yongbok Kim
@ 2014-10-22 13:21   ` James Hogan
  0 siblings, 0 replies; 35+ messages in thread
From: James Hogan @ 2014-10-22 13:21 UTC (permalink / raw)
  To: Yongbok Kim, qemu-devel; +Cc: cristian.cuna, leon.alrae, aurelien

Hi,

On 14/07/14 10:55, Yongbok Kim wrote:
> +static const char * const msaregnames[] = {
> +    "w0.d0",  "w0.d1",  "w1.d0",  "w1.d1",
> +    "w2.d0",  "w2.d1",  "w3.d0",  "w3.d1",
> +    "w4.d0",  "w4.d1",  "w4.d0",  "w4.d1",

I think those last 2 should be w5.d0 and w5.d1

> +static inline int check_msa_access(CPUMIPSState *env, DisasContext *ctx,
> +                                    int wt, int ws, int wd)
> +{
> +    if (unlikely((ctx->hflags & MIPS_HFLAG_FPU) &&
> +                 !(ctx->hflags & MIPS_HFLAG_F64))) {
> +        generate_exception(ctx, EXCP_RI);
> +        return 0;
> +    }
> +
> +    if (unlikely(!(ctx->hflags & MIPS_HFLAG_MSA))) {
> +        if (ctx->insn_flags & ASE_MSA) {
> +            generate_exception(ctx, EXCP_MSADIS);
> +            return 0;
> +        } else {
> +            generate_exception(ctx, EXCP_RI);
> +            return 0;
> +        }
> +    }
> +
> +    if (env->active_msa.msair & MSAIR_WRP_BIT) {
> +        int curr_request  = 0;
> +        if (wd != -1) {
> +            curr_request |= (1 << wd);
> +        }
> +        if (wt != -1) {
> +            curr_request |= (1 << wt);
> +        }
> +        if (ws != -1) {
> +            curr_request |= (1 << ws);
> +        }
> +        env->active_msa.msarequest = curr_request
> +                & (~env->active_msa.msaaccess | env->active_msa.msasave);
> +        if (unlikely(env->active_msa.msarequest != 0)) {

Are you sure it's safe to access env here during code generation? How do
you guarantee the values at translation time match the values at run time?

> +            generate_exception(ctx, EXCP_MSADIS);
> +            return 0;
> +        }
> +    }
> +    return 1;
> +}

newline between functions?

> diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
> index 29dc2ef..9e0f67b 100644
> --- a/target-mips/translate_init.c
> +++ b/target-mips/translate_init.c
> @@ -688,3 +688,48 @@ static void mvp_init (CPUMIPSState *env, const mips_def_t *def)
>                               (0x0 << CP0MVPC1_PCX) | (0x0 << CP0MVPC1_PCP2) |
>                               (0x1 << CP0MVPC1_PCP1);
>  }
> +
> +static void msa_reset(CPUMIPSState *env)
> +{
> +#ifdef CONFIG_USER_ONLY
> +    /* MSA access enabled */
> +    env->CP0_Config5 |= 1 << CP0C5_MSAEn;
> +
> +    /* DSP and CP1 enabled, 64-bit FPRs */
> +    env->CP0_Status |= (1 << CP0St_MX);
> +    env->hflags |= MIPS_HFLAG_DSP;

why do you enable DSP?

> +    env->CP0_Status |= (1 << CP0St_CU1) | (1 << CP0St_FR);
> +    env->hflags |= MIPS_HFLAG_F64 | MIPS_HFLAG_COP1X;

shouldn't that depend on the program being loaded, and whether it's
built for fp32 or fp64?

> +#endif
> +
> +    /* Vector register partitioning not implemented */
> +    env->active_msa.msair = 0;
> +    env->active_msa.msaaccess  = 0xffffffff;

the reset state is 0 according to the manual. Maybe this should depend
on CONFIG_USER_ONLY.

Cheers
James

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 08/20] target-mips: add msa_helper.c
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (6 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 07/20] target-mips: add msa_reset(), global msa register Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-10-10  9:27   ` Leon Alrae
  2014-10-22 15:29   ` James Hogan
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 09/20] target-mips: add MSA branch instructions Yongbok Kim
                   ` (11 subsequent siblings)
  19 siblings, 2 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add msa_helper.c

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/Makefile.objs |    2 +-
 target-mips/msa_helper.c  |  196 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 197 insertions(+), 1 deletions(-)
 create mode 100644 target-mips/msa_helper.c

diff --git a/target-mips/Makefile.objs b/target-mips/Makefile.objs
index 716244f..108fd9b 100644
--- a/target-mips/Makefile.objs
+++ b/target-mips/Makefile.objs
@@ -1,4 +1,4 @@
 obj-y += translate.o dsp_helper.o op_helper.o lmi_helper.o helper.o cpu.o
-obj-y += gdbstub.o
+obj-y += gdbstub.o msa_helper.o
 obj-$(CONFIG_SOFTMMU) += machine.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
new file mode 100644
index 0000000..5afc9ae
--- /dev/null
+++ b/target-mips/msa_helper.c
@@ -0,0 +1,196 @@
+/*
+ * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
+ *
+ * Copyright (c) 2014 Imagination Technologies
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "exec/helper-proto.h"
+
+#define DF_BYTE   0
+#define DF_HALF   1
+#define DF_WORD   2
+#define DF_DOUBLE 3
+
+static void msa_check_index(CPUMIPSState *env,
+        uint32_t df, uint32_t n) {
+    switch (df) {
+    case DF_BYTE: /* b */
+        if (n > MSA_WRLEN / 8 - 1) {
+            helper_raise_exception(env, EXCP_RI);
+        }
+        break;
+    case DF_HALF: /* h */
+        if (n > MSA_WRLEN / 16 - 1) {
+            helper_raise_exception(env, EXCP_RI);
+        }
+        break;
+    case DF_WORD: /* w */
+        if (n > MSA_WRLEN / 32 - 1) {
+            helper_raise_exception(env, EXCP_RI);
+        }
+        break;
+    case DF_DOUBLE: /* d */
+        if (n > MSA_WRLEN / 64 - 1) {
+            helper_raise_exception(env, EXCP_RI);
+        }
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
+
+/* Data format min and max values */
+#define DF_BITS(df) (1 << ((df) + 3))
+
+#define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
+#define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
+
+#define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
+#define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
+
+#define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
+#define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
+
+/* Data format bit position and unsigned values */
+#define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
+
+#define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
+#define SIGNED(x, df)                                                   \
+    ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
+
+/* Element-by-element access macros */
+#define DF_ELEMENTS(df, wrlen) (wrlen / DF_BITS(df))
+
+#define  B(pwr, i) (((wr_t *)pwr)->b[i])
+#define BR(pwr, i) (((wr_t *)pwr)->b[i])
+#define BL(pwr, i) (((wr_t *)pwr)->b[i + MSA_WRLEN/16])
+
+#define ALL_B_ELEMENTS(i, wrlen)                \
+    do {                                        \
+        uint32_t i;                             \
+        for (i = wrlen / 8; i--;)
+
+#define  H(pwr, i) (((wr_t *)pwr)->h[i])
+#define HR(pwr, i) (((wr_t *)pwr)->h[i])
+#define HL(pwr, i) (((wr_t *)pwr)->h[i + MSA_WRLEN/32])
+
+#define ALL_H_ELEMENTS(i, wrlen)                \
+    do {                                        \
+        uint32_t i;                             \
+        for (i = wrlen / 16; i--;)
+
+#define  W(pwr, i) (((wr_t *)pwr)->w[i])
+#define WR(pwr, i) (((wr_t *)pwr)->w[i])
+#define WL(pwr, i) (((wr_t *)pwr)->w[i + MSA_WRLEN/64])
+
+#define ALL_W_ELEMENTS(i, wrlen)                \
+    do {                                        \
+        uint32_t i;                             \
+        for (i = wrlen / 32; i--;)
+
+#define  D(pwr, i) (((wr_t *)pwr)->d[i])
+#define DR(pwr, i) (((wr_t *)pwr)->d[i])
+#define DL(pwr, i) (((wr_t *)pwr)->d[i + MSA_WRLEN/128])
+
+#define ALL_D_ELEMENTS(i, wrlen)                \
+    do {                                        \
+        uint32_t i;                             \
+        for (i = wrlen / 64; i--;)
+
+#define Q(pwr, i) (((wr_t *)pwr)->q[i])
+#define ALL_Q_ELEMENTS(i, wrlen)                \
+    do {                                        \
+        uint32_t i;                             \
+        for (i = wrlen / 128; i--;)
+
+#define DONE_ALL_ELEMENTS                       \
+    } while (0)
+
+static inline void msa_move_v(void *pwd, void *pws)
+{
+    ALL_D_ELEMENTS(i, MSA_WRLEN) {
+        D(pwd, i) = D(pws, i);
+    } DONE_ALL_ELEMENTS;
+}
+
+static inline uint64_t msa_load_wr_elem_i64(CPUMIPSState *env, int32_t wreg,
+        int32_t df, int32_t i)
+{
+    i %= DF_ELEMENTS(df, MSA_WRLEN);
+    msa_check_index(env, (uint32_t)df, (uint32_t)i);
+
+    switch (df) {
+    case DF_BYTE: /* b */
+        return (uint8_t)env->active_fpu.fpr[wreg].wr.b[i];
+    case DF_HALF: /* h */
+        return (uint16_t)env->active_fpu.fpr[wreg].wr.h[i];
+    case DF_WORD: /* w */
+        return (uint32_t)env->active_fpu.fpr[wreg].wr.w[i];
+    case DF_DOUBLE: /* d */
+        return (uint64_t)env->active_fpu.fpr[wreg].wr.d[i];
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
+
+static inline int64_t msa_load_wr_elem_s64(CPUMIPSState *env, int32_t wreg,
+        int32_t df, int32_t i)
+{
+    i %= DF_ELEMENTS(df, MSA_WRLEN);
+    msa_check_index(env, (uint32_t)df, (uint32_t)i);
+
+    switch (df) {
+    case DF_BYTE: /* b */
+        return env->active_fpu.fpr[wreg].wr.b[i];
+    case DF_HALF: /* h */
+        return env->active_fpu.fpr[wreg].wr.h[i];
+    case DF_WORD: /* w */
+        return env->active_fpu.fpr[wreg].wr.w[i];
+    case DF_DOUBLE: /* d */
+        return env->active_fpu.fpr[wreg].wr.d[i];
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
+
+static inline void msa_store_wr_elem(CPUMIPSState *env, uint64_t val,
+        int32_t wreg, int32_t df, int32_t i)
+{
+    i %= DF_ELEMENTS(df, MSA_WRLEN);
+    msa_check_index(env, (uint32_t)df, (uint32_t)i);
+
+    switch (df) {
+    case DF_BYTE: /* b */
+        env->active_fpu.fpr[wreg].wr.b[i] = (uint8_t)val;
+        break;
+    case DF_HALF: /* h */
+        env->active_fpu.fpr[wreg].wr.h[i] = (uint16_t)val;
+        break;
+    case DF_WORD: /* w */
+        env->active_fpu.fpr[wreg].wr.w[i] = (uint32_t)val;
+        break;
+    case DF_DOUBLE: /* d */
+        env->active_fpu.fpr[wreg].wr.d[i] = (uint64_t)val;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 08/20] target-mips: add msa_helper.c
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 08/20] target-mips: add msa_helper.c Yongbok Kim
@ 2014-10-10  9:27   ` Leon Alrae
  2014-10-22 15:29   ` James Hogan
  1 sibling, 0 replies; 35+ messages in thread
From: Leon Alrae @ 2014-10-10  9:27 UTC (permalink / raw)
  To: Yongbok Kim, qemu-devel; +Cc: cristian.cuna, aurelien

On 14/07/2014 10:55, Yongbok Kim wrote:
> add msa_helper.c
> 
> Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
> ---
>  target-mips/Makefile.objs |    2 +-
>  target-mips/msa_helper.c  |  196 +++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 197 insertions(+), 1 deletions(-)
>  create mode 100644 target-mips/msa_helper.c
> 
> diff --git a/target-mips/Makefile.objs b/target-mips/Makefile.objs
> index 716244f..108fd9b 100644
> --- a/target-mips/Makefile.objs
> +++ b/target-mips/Makefile.objs
> @@ -1,4 +1,4 @@
>  obj-y += translate.o dsp_helper.o op_helper.o lmi_helper.o helper.o cpu.o
> -obj-y += gdbstub.o
> +obj-y += gdbstub.o msa_helper.o
>  obj-$(CONFIG_SOFTMMU) += machine.o
>  obj-$(CONFIG_KVM) += kvm.o
> diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
> new file mode 100644
> index 0000000..5afc9ae
> --- /dev/null
> +++ b/target-mips/msa_helper.c
> @@ -0,0 +1,196 @@
> +/*
> + * MIPS SIMD Architecture Module Instruction emulation helpers for QEMU.
> + *
> + * Copyright (c) 2014 Imagination Technologies
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include "cpu.h"
> +#include "exec/helper-proto.h"
> +
> +#define DF_BYTE   0
> +#define DF_HALF   1
> +#define DF_WORD   2
> +#define DF_DOUBLE 3

Enum probably would be nicer as we have a few related constants.

> +
> +static void msa_check_index(CPUMIPSState *env,
> +        uint32_t df, uint32_t n) {
> +    switch (df) {
> +    case DF_BYTE: /* b */
> +        if (n > MSA_WRLEN / 8 - 1) {
> +            helper_raise_exception(env, EXCP_RI);
> +        }
> +        break;
> +    case DF_HALF: /* h */
> +        if (n > MSA_WRLEN / 16 - 1) {
> +            helper_raise_exception(env, EXCP_RI);
> +        }
> +        break;
> +    case DF_WORD: /* w */
> +        if (n > MSA_WRLEN / 32 - 1) {
> +            helper_raise_exception(env, EXCP_RI);
> +        }
> +        break;
> +    case DF_DOUBLE: /* d */
> +        if (n > MSA_WRLEN / 64 - 1) {
> +            helper_raise_exception(env, EXCP_RI);
> +        }
> +        break;
> +    default:
> +        /* shouldn't get here */
> +        assert(0);
> +    }
> +}

I cannot find any place where msa_check_index would be useful. What I
can see however, is that in some msa instructions this is called 3 times
for each vector element. Please remove it.

> +
> +/* Data format min and max values */
> +#define DF_BITS(df) (1 << ((df) + 3))
> +
> +#define DF_MAX_INT(df)  (int64_t)((1LL << (DF_BITS(df) - 1)) - 1)
> +#define M_MAX_INT(m)    (int64_t)((1LL << ((m)         - 1)) - 1)
> +
> +#define DF_MIN_INT(df)  (int64_t)(-(1LL << (DF_BITS(df) - 1)))
> +#define M_MIN_INT(m)    (int64_t)(-(1LL << ((m)         - 1)))
> +
> +#define DF_MAX_UINT(df) (uint64_t)(-1ULL >> (64 - DF_BITS(df)))
> +#define M_MAX_UINT(m)   (uint64_t)(-1ULL >> (64 - (m)))
> +
> +/* Data format bit position and unsigned values */
> +#define BIT_POSITION(x, df) ((uint64_t)(x) % DF_BITS(df))
> +
> +#define UNSIGNED(x, df) ((x) & DF_MAX_UINT(df))
> +#define SIGNED(x, df)                                                   \
> +    ((((int64_t)x) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)))
> +
> +/* Element-by-element access macros */
> +#define DF_ELEMENTS(df, wrlen) (wrlen / DF_BITS(df))

wrlen as an input argument is not needed as you are always refering to
vector register size, i.e. MSA_WRLEN.

> +
> +#define  B(pwr, i) (((wr_t *)pwr)->b[i])
> +#define BR(pwr, i) (((wr_t *)pwr)->b[i])
> +#define BL(pwr, i) (((wr_t *)pwr)->b[i + MSA_WRLEN/16])

Replace void* with wr_t* in helpers instead of casting the type here.

> +
> +#define ALL_B_ELEMENTS(i, wrlen)                \
> +    do {                                        \
> +        uint32_t i;                             \
> +        for (i = wrlen / 8; i--;)
> +
> +#define  H(pwr, i) (((wr_t *)pwr)->h[i])
> +#define HR(pwr, i) (((wr_t *)pwr)->h[i])
> +#define HL(pwr, i) (((wr_t *)pwr)->h[i + MSA_WRLEN/32])
> +
> +#define ALL_H_ELEMENTS(i, wrlen)                \
> +    do {                                        \
> +        uint32_t i;                             \
> +        for (i = wrlen / 16; i--;)
> +
> +#define  W(pwr, i) (((wr_t *)pwr)->w[i])
> +#define WR(pwr, i) (((wr_t *)pwr)->w[i])
> +#define WL(pwr, i) (((wr_t *)pwr)->w[i + MSA_WRLEN/64])
> +
> +#define ALL_W_ELEMENTS(i, wrlen)                \
> +    do {                                        \
> +        uint32_t i;                             \
> +        for (i = wrlen / 32; i--;)
> +
> +#define  D(pwr, i) (((wr_t *)pwr)->d[i])
> +#define DR(pwr, i) (((wr_t *)pwr)->d[i])
> +#define DL(pwr, i) (((wr_t *)pwr)->d[i + MSA_WRLEN/128])
> +
> +#define ALL_D_ELEMENTS(i, wrlen)                \
> +    do {                                        \
> +        uint32_t i;                             \
> +        for (i = wrlen / 64; i--;)
> +
> +#define Q(pwr, i) (((wr_t *)pwr)->q[i])
> +#define ALL_Q_ELEMENTS(i, wrlen)                \
> +    do {                                        \
> +        uint32_t i;                             \
> +        for (i = wrlen / 128; i--;)
> +
> +#define DONE_ALL_ELEMENTS                       \
> +    } while (0)

I don't see any benefit from using ALL_*_ELEMENTS and DONE_ALL_ELEMENTS.
Woudn't it be better just to use regular for statements?

> +
> +static inline void msa_move_v(void *pwd, void *pws)
> +{
> +    ALL_D_ELEMENTS(i, MSA_WRLEN) {
> +        D(pwd, i) = D(pws, i);
> +    } DONE_ALL_ELEMENTS;
> +}
> +
> +static inline uint64_t msa_load_wr_elem_i64(CPUMIPSState *env, int32_t wreg,
> +        int32_t df, int32_t i)
> +{
> +    i %= DF_ELEMENTS(df, MSA_WRLEN);
> +    msa_check_index(env, (uint32_t)df, (uint32_t)i);
msa_check_index not needed.

> +
> +    switch (df) {
> +    case DF_BYTE: /* b */

The comment is redundant, data format is already indicated by DF_BYTE.
The same applies in other places.

> +        return (uint8_t)env->active_fpu.fpr[wreg].wr.b[i];
> +    case DF_HALF: /* h */
> +        return (uint16_t)env->active_fpu.fpr[wreg].wr.h[i];
> +    case DF_WORD: /* w */
> +        return (uint32_t)env->active_fpu.fpr[wreg].wr.w[i];
> +    case DF_DOUBLE: /* d */
> +        return (uint64_t)env->active_fpu.fpr[wreg].wr.d[i];
> +    default:
> +        /* shouldn't get here */
> +        assert(0);
> +    }
> +}
> +
> +static inline int64_t msa_load_wr_elem_s64(CPUMIPSState *env, int32_t wreg,
> +        int32_t df, int32_t i)
> +{
> +    i %= DF_ELEMENTS(df, MSA_WRLEN);
> +    msa_check_index(env, (uint32_t)df, (uint32_t)i);
> +
> +    switch (df) {
> +    case DF_BYTE: /* b */
> +        return env->active_fpu.fpr[wreg].wr.b[i];
> +    case DF_HALF: /* h */
> +        return env->active_fpu.fpr[wreg].wr.h[i];
> +    case DF_WORD: /* w */
> +        return env->active_fpu.fpr[wreg].wr.w[i];
> +    case DF_DOUBLE: /* d */
> +        return env->active_fpu.fpr[wreg].wr.d[i];
> +    default:
> +        /* shouldn't get here */
> +        assert(0);
> +    }
> +}
> +
> +static inline void msa_store_wr_elem(CPUMIPSState *env, uint64_t val,
> +        int32_t wreg, int32_t df, int32_t i)
> +{
> +    i %= DF_ELEMENTS(df, MSA_WRLEN);
> +    msa_check_index(env, (uint32_t)df, (uint32_t)i);
> +
> +    switch (df) {
> +    case DF_BYTE: /* b */
> +        env->active_fpu.fpr[wreg].wr.b[i] = (uint8_t)val;
> +        break;
> +    case DF_HALF: /* h */
> +        env->active_fpu.fpr[wreg].wr.h[i] = (uint16_t)val;
> +        break;
> +    case DF_WORD: /* w */
> +        env->active_fpu.fpr[wreg].wr.w[i] = (uint32_t)val;
> +        break;
> +    case DF_DOUBLE: /* d */
> +        env->active_fpu.fpr[wreg].wr.d[i] = (uint64_t)val;
> +        break;
> +    default:
> +        /* shouldn't get here */
> +        assert(0);
> +    }
> +}
> 

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 08/20] target-mips: add msa_helper.c
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 08/20] target-mips: add msa_helper.c Yongbok Kim
  2014-10-10  9:27   ` Leon Alrae
@ 2014-10-22 15:29   ` James Hogan
  1 sibling, 0 replies; 35+ messages in thread
From: James Hogan @ 2014-10-22 15:29 UTC (permalink / raw)
  To: Yongbok Kim, qemu-devel; +Cc: cristian.cuna, leon.alrae, aurelien

On 14/07/14 10:55, Yongbok Kim wrote:
> +#define  B(pwr, i) (((wr_t *)pwr)->b[i])
> +#define BR(pwr, i) (((wr_t *)pwr)->b[i])
> +#define BL(pwr, i) (((wr_t *)pwr)->b[i + MSA_WRLEN/16])

macro argument references should be enclosed in brackets really (to
avoid precedence problems).

> +
> +#define ALL_B_ELEMENTS(i, wrlen)                \
> +    do {                                        \
> +        uint32_t i;                             \
> +        for (i = wrlen / 8; i--;)

eww... there's gotta be a nicer way.

Is it really so long winded not to do directly?
	int i;
	for (i = 0; i < MSA_WRLEN/8; ++i) {

	}

compared to what you have at the moment:
	ALL_B_ELEMENTS(i, MSA_WRLEN) {

	} DONE_ALL_ELEMENTS;

It would be much more familiar/readable, and the ordering is explicit
too (just in case it matters for any vector operations when source ==
destination)

> +static inline void msa_move_v(void *pwd, void *pws)

why not s/void/wr_t/?

You could then presumably do *pwd = *pws

> +{
> +    ALL_D_ELEMENTS(i, MSA_WRLEN) {
> +        D(pwd, i) = D(pws, i);
> +    } DONE_ALL_ELEMENTS;
> +}

Cheers
James

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 09/20] target-mips: add MSA branch instructions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (7 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 08/20] target-mips: add msa_helper.c Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-10-10 14:13   ` Leon Alrae
  2014-10-28 23:05   ` James Hogan
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 10/20] target-mips: add MSA I8 format instructions Yongbok Kim
                   ` (10 subsequent siblings)
  19 siblings, 2 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA branch instructions

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/translate.c |  107 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/target-mips/translate.c b/target-mips/translate.c
index b8dbbdc..0bfbcfe 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -14688,6 +14688,95 @@ static inline int check_msa_access(CPUMIPSState *env, DisasContext *ctx,
     }
     return 1;
 }
+
+static void determ_zero_element(TCGv tresult, uint8_t df, uint8_t wt)
+{
+    /* Note this function only works with MSA_WRLEN = 128 */
+    uint64_t eval_zero_or_big;
+    uint64_t eval_big;
+    switch (df) {
+    case 0: /*DF_BYTE*/
+        eval_zero_or_big = 0x0101010101010101ULL;
+        eval_big = 0x8080808080808080ULL;
+        break;
+    case 1: /*DF_HALF*/
+        eval_zero_or_big = 0x0001000100010001ULL;
+        eval_big = 0x8000800080008000ULL;
+        break;
+    case 2: /*DF_WORD*/
+        eval_zero_or_big = 0x0000000100000001ULL;
+        eval_big = 0x8000000080000000ULL;
+        break;
+    case 3: /*DF_DOUBLE*/
+        eval_zero_or_big = 0x0000000000000001ULL;
+        eval_big = 0x8000000000000000ULL;
+        break;
+    }
+    TCGv_i64 t0 = tcg_temp_local_new_i64();
+    TCGv_i64 t1 = tcg_temp_local_new_i64();
+    tcg_gen_subi_i64(t0, msa_wr_d[wt<<1], eval_zero_or_big);
+    tcg_gen_andc_i64(t0, t0, msa_wr_d[wt<<1]);
+    tcg_gen_andi_i64(t0, t0, eval_big);
+    tcg_gen_subi_i64(t1, msa_wr_d[(wt<<1)+1], eval_zero_or_big);
+    tcg_gen_andc_i64(t1, t1, msa_wr_d[(wt<<1)+1]);
+    tcg_gen_andi_i64(t1, t1, eval_big);
+    tcg_gen_or_i64(t0, t0, t1);
+    /* if all bits is zero then all element is not zero */
+    /* if some bit is non-zero then some element is zero */
+    tcg_gen_setcondi_i64(TCG_COND_NE, t0, t0, 0);
+    tcg_gen_trunc_i64_tl(tresult, t0);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
+static void gen_msa_branch(CPUMIPSState *env, DisasContext *ctx, uint32_t op1)
+{
+    check_insn(ctx, ASE_MSA);
+
+    if (ctx->hflags & MIPS_HFLAG_BMASK) {
+        generate_exception(ctx, EXCP_RI);
+        return;
+    }
+
+    uint8_t df = (ctx->opcode >> 21) & 0x3 /* df [22:21] */;
+    uint8_t wt = (ctx->opcode >> 16) & 0x1f /* wt [20:16] */;
+    int64_t s16 = (ctx->opcode >> 0) & 0xffff /* s16 [15:0] */;
+    s16 = (s16 << 48) >> 48; /* sign extend s16 to 64 bits*/
+
+    check_msa_access(env, ctx, wt, -1, -1);
+
+    switch (op1) {
+    case OPC_MSA_BZ_V:
+    case OPC_MSA_BNZ_V:
+        {
+            TCGv_i64 t0 = tcg_temp_local_new_i64();
+            tcg_gen_or_i64(t0, msa_wr_d[wt<<1], msa_wr_d[(wt<<1)+1]);
+            tcg_gen_setcondi_i64((op1 == OPC_MSA_BZ_V) ?
+                    TCG_COND_EQ : TCG_COND_NE, t0, t0, 0);
+            tcg_gen_trunc_i64_tl(bcond, t0);
+            tcg_temp_free_i64(t0);
+        }
+        break;
+    case OPC_MSA_BZ_B:
+    case OPC_MSA_BZ_H:
+    case OPC_MSA_BZ_W:
+    case OPC_MSA_BZ_D:
+        determ_zero_element(bcond, df, wt);
+        break;
+    case OPC_MSA_BNZ_B:
+    case OPC_MSA_BNZ_H:
+    case OPC_MSA_BNZ_W:
+    case OPC_MSA_BNZ_D:
+        determ_zero_element(bcond, df, wt);
+        tcg_gen_setcondi_tl(TCG_COND_EQ, bcond, bcond, 0);
+        break;
+    }
+
+    int64_t offset = s16 << 2;
+    ctx->btarget = ctx->pc + offset + 4;
+
+    ctx->hflags |= MIPS_HFLAG_BC;
+}
 static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
 {
     int32_t offset;
@@ -15729,9 +15818,23 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
         break;
 
     case OPC_CP1:
-        if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
+        op1 = MASK_CP1(ctx->opcode);
+
+        if ((ctx->insn_flags & ASE_MSA) &&
+                (op1 == OPC_MSA_BZ_V ||
+                 op1 == OPC_MSA_BNZ_V ||
+                 op1 == OPC_MSA_BZ_B ||
+                 op1 == OPC_MSA_BZ_H ||
+                 op1 == OPC_MSA_BZ_W ||
+                 op1 == OPC_MSA_BZ_D ||
+                 op1 == OPC_MSA_BNZ_B ||
+                 op1 == OPC_MSA_BNZ_H ||
+                 op1 == OPC_MSA_BNZ_W ||
+                 op1 == OPC_MSA_BNZ_D)) {
+            gen_msa_branch(env, ctx, op1);
+        } else if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
             check_cp1_enabled(ctx);
-            op1 = MASK_CP1(ctx->opcode);
+
             switch (op1) {
             case OPC_MFHC1:
             case OPC_MTHC1:
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 09/20] target-mips: add MSA branch instructions
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 09/20] target-mips: add MSA branch instructions Yongbok Kim
@ 2014-10-10 14:13   ` Leon Alrae
  2014-10-28 23:05   ` James Hogan
  1 sibling, 0 replies; 35+ messages in thread
From: Leon Alrae @ 2014-10-10 14:13 UTC (permalink / raw)
  To: Yongbok Kim, qemu-devel; +Cc: cristian.cuna, aurelien

On 14/07/2014 10:55, Yongbok Kim wrote:
> add MSA branch instructions
> 
> Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
> ---
>  target-mips/translate.c |  107 ++++++++++++++++++++++++++++++++++++++++++++++-
>  1 files changed, 105 insertions(+), 2 deletions(-)
> 
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index b8dbbdc..0bfbcfe 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -14688,6 +14688,95 @@ static inline int check_msa_access(CPUMIPSState *env, DisasContext *ctx,
>      }
>      return 1;
>  }
> +
> +static void determ_zero_element(TCGv tresult, uint8_t df, uint8_t wt)
> +{

nit: if you add gen_ prefix to this function name it will be clear that
it generates tcg operations without having to look at the body

> +    /* Note this function only works with MSA_WRLEN = 128 */
> +    uint64_t eval_zero_or_big;
> +    uint64_t eval_big;
> +    switch (df) {
> +    case 0: /*DF_BYTE*/

why not using DF_BYTE, DF_HALF etc. directly rather than putting in
comments?

> +        eval_zero_or_big = 0x0101010101010101ULL;
> +        eval_big = 0x8080808080808080ULL;
> +        break;
> +    case 1: /*DF_HALF*/
> +        eval_zero_or_big = 0x0001000100010001ULL;
> +        eval_big = 0x8000800080008000ULL;
> +        break;
> +    case 2: /*DF_WORD*/
> +        eval_zero_or_big = 0x0000000100000001ULL;
> +        eval_big = 0x8000000080000000ULL;
> +        break;
> +    case 3: /*DF_DOUBLE*/
> +        eval_zero_or_big = 0x0000000000000001ULL;
> +        eval_big = 0x8000000000000000ULL;
> +        break;
> +    }
> +    TCGv_i64 t0 = tcg_temp_local_new_i64();
> +    TCGv_i64 t1 = tcg_temp_local_new_i64();

local temps aren't needed here, normal temps would be sufficient

> +    tcg_gen_subi_i64(t0, msa_wr_d[wt<<1], eval_zero_or_big);
> +    tcg_gen_andc_i64(t0, t0, msa_wr_d[wt<<1]);
> +    tcg_gen_andi_i64(t0, t0, eval_big);
> +    tcg_gen_subi_i64(t1, msa_wr_d[(wt<<1)+1], eval_zero_or_big);
> +    tcg_gen_andc_i64(t1, t1, msa_wr_d[(wt<<1)+1]);
> +    tcg_gen_andi_i64(t1, t1, eval_big);
> +    tcg_gen_or_i64(t0, t0, t1);
> +    /* if all bits is zero then all element is not zero */
> +    /* if some bit is non-zero then some element is zero */
> +    tcg_gen_setcondi_i64(TCG_COND_NE, t0, t0, 0);
> +    tcg_gen_trunc_i64_tl(tresult, t0);
> +    tcg_temp_free_i64(t0);
> +    tcg_temp_free_i64(t1);
> +}
> +
> +static void gen_msa_branch(CPUMIPSState *env, DisasContext *ctx, uint32_t op1)
> +{
> +    check_insn(ctx, ASE_MSA);
> +
> +    if (ctx->hflags & MIPS_HFLAG_BMASK) {
> +        generate_exception(ctx, EXCP_RI);
> +        return;
> +    }
> +
> +    uint8_t df = (ctx->opcode >> 21) & 0x3 /* df [22:21] */;
> +    uint8_t wt = (ctx->opcode >> 16) & 0x1f /* wt [20:16] */;
> +    int64_t s16 = (ctx->opcode >> 0) & 0xffff /* s16 [15:0] */;
> +    s16 = (s16 << 48) >> 48; /* sign extend s16 to 64 bits*/

int64_t s16 = (int16_t)ctx->opcode :)

Also, I think in QEMU it's preferable to have declarations at the
beginning of a block

> +
> +    check_msa_access(env, ctx, wt, -1, -1);
> +
> +    switch (op1) {
> +    case OPC_MSA_BZ_V:
> +    case OPC_MSA_BNZ_V:
> +        {
> +            TCGv_i64 t0 = tcg_temp_local_new_i64();
> +            tcg_gen_or_i64(t0, msa_wr_d[wt<<1], msa_wr_d[(wt<<1)+1]);
> +            tcg_gen_setcondi_i64((op1 == OPC_MSA_BZ_V) ?
> +                    TCG_COND_EQ : TCG_COND_NE, t0, t0, 0);
> +            tcg_gen_trunc_i64_tl(bcond, t0);
> +            tcg_temp_free_i64(t0);
> +        }
> +        break;
> +    case OPC_MSA_BZ_B:
> +    case OPC_MSA_BZ_H:
> +    case OPC_MSA_BZ_W:
> +    case OPC_MSA_BZ_D:
> +        determ_zero_element(bcond, df, wt);
> +        break;
> +    case OPC_MSA_BNZ_B:
> +    case OPC_MSA_BNZ_H:
> +    case OPC_MSA_BNZ_W:
> +    case OPC_MSA_BNZ_D:
> +        determ_zero_element(bcond, df, wt);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, bcond, bcond, 0);
> +        break;
> +    }
> +
> +    int64_t offset = s16 << 2;
> +    ctx->btarget = ctx->pc + offset + 4;
> +
> +    ctx->hflags |= MIPS_HFLAG_BC;
> +}
>  static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
>  {
>      int32_t offset;
> @@ -15729,9 +15818,23 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
>          break;
>  
>      case OPC_CP1:
> -        if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
> +        op1 = MASK_CP1(ctx->opcode);
> +
> +        if ((ctx->insn_flags & ASE_MSA) &&
> +                (op1 == OPC_MSA_BZ_V ||
> +                 op1 == OPC_MSA_BNZ_V ||
> +                 op1 == OPC_MSA_BZ_B ||
> +                 op1 == OPC_MSA_BZ_H ||
> +                 op1 == OPC_MSA_BZ_W ||
> +                 op1 == OPC_MSA_BZ_D ||
> +                 op1 == OPC_MSA_BNZ_B ||
> +                 op1 == OPC_MSA_BNZ_H ||
> +                 op1 == OPC_MSA_BNZ_W ||
> +                 op1 == OPC_MSA_BNZ_D)) {
> +            gen_msa_branch(env, ctx, op1);

can't this be merged into the switch below?

> +        } else if (ctx->CP0_Config1 & (1 << CP0C1_FP)) {
>              check_cp1_enabled(ctx);
> -            op1 = MASK_CP1(ctx->opcode);
> +
>              switch (op1) {
>              case OPC_MFHC1:
>              case OPC_MTHC1:
> 

Regards,
Leon

^ permalink raw reply	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 09/20] target-mips: add MSA branch instructions
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 09/20] target-mips: add MSA branch instructions Yongbok Kim
  2014-10-10 14:13   ` Leon Alrae
@ 2014-10-28 23:05   ` James Hogan
  1 sibling, 0 replies; 35+ messages in thread
From: James Hogan @ 2014-10-28 23:05 UTC (permalink / raw)
  To: Yongbok Kim; +Cc: cristian.cuna, leon.alrae, qemu-devel, aurelien

Hi Yongbok,

I know you're preparing another patchset, but thought I may as well
continue reviewing this patchset until that one lands, sorry it's taken
me a while to get round to it.

On Mon, Jul 14, 2014 at 10:55:52AM +0100, Yongbok Kim wrote:
> +static void determ_zero_element(TCGv tresult, uint8_t df, uint8_t wt)
> +{
> +    /* Note this function only works with MSA_WRLEN = 128 */

I reckon a quick comment to explain what this function is doing wouldn't
hurt, since not obvious from name.

I'm guessing from knowledge of MSA branches it determines whether there
is a zero element.

> +    uint64_t eval_zero_or_big;
> +    uint64_t eval_big;
> +    switch (df) {
> +    case 0: /*DF_BYTE*/
> +        eval_zero_or_big = 0x0101010101010101ULL;
> +        eval_big = 0x8080808080808080ULL;
> +        break;
> +    case 1: /*DF_HALF*/
> +        eval_zero_or_big = 0x0001000100010001ULL;
> +        eval_big = 0x8000800080008000ULL;
> +        break;
> +    case 2: /*DF_WORD*/
> +        eval_zero_or_big = 0x0000000100000001ULL;
> +        eval_big = 0x8000000080000000ULL;
> +        break;
> +    case 3: /*DF_DOUBLE*/
> +        eval_zero_or_big = 0x0000000000000001ULL;
> +        eval_big = 0x8000000000000000ULL;
> +        break;
> +    }
> +    TCGv_i64 t0 = tcg_temp_local_new_i64();
> +    TCGv_i64 t1 = tcg_temp_local_new_i64();

Variable declarations after code

These don't need preserving over any branches, so presumably they don't
need to be local temps.

> +    tcg_gen_subi_i64(t0, msa_wr_d[wt<<1], eval_zero_or_big);
> +    tcg_gen_andc_i64(t0, t0, msa_wr_d[wt<<1]);
> +    tcg_gen_andi_i64(t0, t0, eval_big);
> +    tcg_gen_subi_i64(t1, msa_wr_d[(wt<<1)+1], eval_zero_or_big);
> +    tcg_gen_andc_i64(t1, t1, msa_wr_d[(wt<<1)+1]);
> +    tcg_gen_andi_i64(t1, t1, eval_big);
> +    tcg_gen_or_i64(t0, t0, t1);
> +    /* if all bits is zero then all element is not zero */

nit: s/is/are/, s/element/elements/

> +    /* if some bit is non-zero then some element is zero */
> +    tcg_gen_setcondi_i64(TCG_COND_NE, t0, t0, 0);
> +    tcg_gen_trunc_i64_tl(tresult, t0);
> +    tcg_temp_free_i64(t0);
> +    tcg_temp_free_i64(t1);
> +}
> +
> +static void gen_msa_branch(CPUMIPSState *env, DisasContext *ctx, uint32_t op1)
> +{
> +    check_insn(ctx, ASE_MSA);
> +
> +    if (ctx->hflags & MIPS_HFLAG_BMASK) {
> +        generate_exception(ctx, EXCP_RI);
> +        return;
> +    }
> +
> +    uint8_t df = (ctx->opcode >> 21) & 0x3 /* df [22:21] */;
> +    uint8_t wt = (ctx->opcode >> 16) & 0x1f /* wt [20:16] */;

The TRM on public website is wrong about the branch encoding! :-P

> +    int64_t s16 = (ctx->opcode >> 0) & 0xffff /* s16 [15:0] */;
> +    s16 = (s16 << 48) >> 48; /* sign extend s16 to 64 bits*/

declarations after code

why not just use int16_t and let the compiler worry about sign
extension?

> +
> +    check_msa_access(env, ctx, wt, -1, -1);
> +
> +    switch (op1) {
> +    case OPC_MSA_BZ_V:
> +    case OPC_MSA_BNZ_V:
> +        {
> +            TCGv_i64 t0 = tcg_temp_local_new_i64();

i don't think this needs to be a local

> +            tcg_gen_or_i64(t0, msa_wr_d[wt<<1], msa_wr_d[(wt<<1)+1]);
> +            tcg_gen_setcondi_i64((op1 == OPC_MSA_BZ_V) ?
> +                    TCG_COND_EQ : TCG_COND_NE, t0, t0, 0);
> +            tcg_gen_trunc_i64_tl(bcond, t0);
> +            tcg_temp_free_i64(t0);
> +        }
> +        break;
> +    case OPC_MSA_BZ_B:
> +    case OPC_MSA_BZ_H:
> +    case OPC_MSA_BZ_W:
> +    case OPC_MSA_BZ_D:
> +        determ_zero_element(bcond, df, wt);
> +        break;
> +    case OPC_MSA_BNZ_B:
> +    case OPC_MSA_BNZ_H:
> +    case OPC_MSA_BNZ_W:
> +    case OPC_MSA_BNZ_D:
> +        determ_zero_element(bcond, df, wt);
> +        tcg_gen_setcondi_tl(TCG_COND_EQ, bcond, bcond, 0);

Might be slightly more efficient to just get determ_zero_element to
generatee the correct condition in the first place.

> +        break;
> +    }
> +
> +    int64_t offset = s16 << 2;

declaration after code

> +    ctx->btarget = ctx->pc + offset + 4;
> +
> +    ctx->hflags |= MIPS_HFLAG_BC;
> +}

blank line would be nice

>  static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
>  {
>      int32_t offset;

Cheers
James

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 10/20] target-mips: add MSA I8 format instructions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (8 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 09/20] target-mips: add MSA branch instructions Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-10-28 23:54   ` James Hogan
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 11/20] target-mips: add MSA I5 " Yongbok Kim
                   ` (9 subsequent siblings)
  19 siblings, 1 reply; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA I8 format instructions

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.h     |   11 ++++
 target-mips/msa_helper.c |  140 ++++++++++++++++++++++++++++++++++++++++++++++
 target-mips/translate.c  |   94 ++++++++++++++++++++++++++++++-
 3 files changed, 243 insertions(+), 2 deletions(-)

diff --git a/target-mips/helper.h b/target-mips/helper.h
index 74ef094..174bc62 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -689,3 +689,14 @@ DEF_HELPER_FLAGS_3(dmthlip, 0, void, tl, tl, env)
 #endif
 DEF_HELPER_FLAGS_3(wrdsp, 0, void, tl, tl, env)
 DEF_HELPER_FLAGS_2(rddsp, 0, tl, tl, env)
+
+/* MIPS SIMD Architecture */
+
+DEF_HELPER_4(msa_andi_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_bmnzi_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_bmzi_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_bseli_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_nori_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_ori_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_shf_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_xori_b, void, env, i32, i32, i32)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index 5afc9ae..2355809 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -194,3 +194,143 @@ static inline void msa_store_wr_elem(CPUMIPSState *env, uint64_t val,
         assert(0);
     }
 }
+
+void helper_msa_andi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        B(pwd, i) = B(pws, i) & i8;
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_ori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        B(pwd, i) = B(pws, i) | i8;
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_nori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        B(pwd, i) = ~(B(pws, i) | i8);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_xori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        B(pwd, i) = B(pws, i) ^ i8;
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
+            dest = UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
+
+void helper_msa_bmnzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        BIT_MOVE_IF_NOT_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
+            dest = UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
+
+void helper_msa_bmzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        BIT_MOVE_IF_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define BIT_SELECT(dest, arg1, arg2, df) \
+            dest = UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
+
+void helper_msa_bseli_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        BIT_SELECT(B(pwd, i), B(pws, i), i8, DF_BYTE);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define SHF_POS(i, imm) ((i & 0xfc) + ((imm >> (2 * (i & 0x03))) & 0x03))
+
+static inline void msa_shf_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, uint32_t imm)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+      ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        B(pwx, i) = B(pws, SHF_POS(i, imm));
+      } DONE_ALL_ELEMENTS;
+      break;
+    case DF_HALF:
+      ALL_H_ELEMENTS(i, MSA_WRLEN) {
+        H(pwx, i) = H(pws, SHF_POS(i, imm));
+      } DONE_ALL_ELEMENTS;
+      break;
+    case DF_WORD:
+      ALL_W_ELEMENTS(i, MSA_WRLEN) {
+        W(pwx, i) = W(pws, SHF_POS(i, imm));
+      } DONE_ALL_ELEMENTS;
+      break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t imm)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    msa_shf_df(env, df, pwd, pws, imm);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 0bfbcfe..c241299 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -14777,6 +14777,95 @@ static void gen_msa_branch(CPUMIPSState *env, DisasContext *ctx, uint32_t op1)
 
     ctx->hflags |= MIPS_HFLAG_BC;
 }
+
+static void gen_msa_i8(CPUMIPSState *env, DisasContext *ctx)
+{
+#define MASK_MSA_I8(op)    (MASK_MSA_MINOR(op) | (op & (0x03 << 24)))
+
+    uint32_t opcode = ctx->opcode;
+
+    uint8_t i8 = (opcode >> 16) & 0xff /* i8 [23:16] */;
+    uint8_t ws = (opcode >> 11) & 0x1f /* ws [15:11] */;
+    uint8_t wd = (opcode >> 6) & 0x1f /* wd [10:6] */;
+
+    TCGv_i32 twd = tcg_const_i32(wd);
+    TCGv_i32 tws = tcg_const_i32(ws);
+    TCGv_i32 ti8 = tcg_const_i32(i8);
+
+    switch (MASK_MSA_I8(opcode)) {
+    case OPC_MSA_ANDI_B:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_andi_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_MSA_ORI_B:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_ori_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_MSA_NORI_B:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_nori_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_MSA_XORI_B:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_xori_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_MSA_BMNZI_B:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_bmnzi_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_MSA_BMZI_B:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_bmzi_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_MSA_BSELI_B:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_bseli_b(cpu_env, twd, tws, ti8);
+        break;
+    case OPC_MSA_SHF_B:
+    case OPC_MSA_SHF_H:
+    case OPC_MSA_SHF_W:
+        {
+            uint8_t df = (opcode >> 24) & 0x3;
+            if (df == 3) {
+                generate_exception(ctx, EXCP_RI);
+            } else {
+                TCGv_i32 tdf = tcg_const_i32(df);
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_shf_df(cpu_env, tdf, twd, tws, ti8);
+                tcg_temp_free_i32(tdf);
+            }
+        }
+        break;
+    default:
+        MIPS_INVAL("MSA instruction");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+
+    tcg_temp_free_i32(twd);
+    tcg_temp_free_i32(tws);
+    tcg_temp_free_i32(ti8);
+}
+
+static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
+{
+    uint32_t opcode = ctx->opcode;
+    check_insn(ctx, ASE_MSA);
+
+    switch (MASK_MSA_MINOR(opcode)) {
+    case OPC_MSA_I8_00:
+    case OPC_MSA_I8_01:
+    case OPC_MSA_I8_02:
+        gen_msa_i8(env, ctx);
+        break;
+    default:
+        MIPS_INVAL("MSA instruction");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+
+}
+
 static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
 {
     int32_t offset;
@@ -15967,9 +16056,10 @@ static void decode_opc (CPUMIPSState *env, DisasContext *ctx)
         offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
         gen_compute_branch(ctx, op, 4, rs, rt, offset, 4);
         break;
-    case OPC_MDMX:
-        check_insn(ctx, ASE_MDMX);
+    case OPC_MSA: /* OPC_MDMX */
         /* MDMX: Not implemented. */
+        gen_msa(env, ctx);
+        break;
     default:            /* Invalid */
         MIPS_INVAL("major opcode");
         generate_exception(ctx, EXCP_RI);
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* Re: [Qemu-devel] [PATCH 10/20] target-mips: add MSA I8 format instructions
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 10/20] target-mips: add MSA I8 format instructions Yongbok Kim
@ 2014-10-28 23:54   ` James Hogan
  0 siblings, 0 replies; 35+ messages in thread
From: James Hogan @ 2014-10-28 23:54 UTC (permalink / raw)
  To: Yongbok Kim; +Cc: cristian.cuna, leon.alrae, qemu-devel, aurelien

On Mon, Jul 14, 2014 at 10:55:53AM +0100, Yongbok Kim wrote:
> add MSA I8 format instructions
> 
> Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
> ---
>  target-mips/helper.h     |   11 ++++
>  target-mips/msa_helper.c |  140 ++++++++++++++++++++++++++++++++++++++++++++++
>  target-mips/translate.c  |   94 ++++++++++++++++++++++++++++++-
>  3 files changed, 243 insertions(+), 2 deletions(-)
> 
> diff --git a/target-mips/helper.h b/target-mips/helper.h
> index 74ef094..174bc62 100644
> --- a/target-mips/helper.h
> +++ b/target-mips/helper.h
> @@ -689,3 +689,14 @@ DEF_HELPER_FLAGS_3(dmthlip, 0, void, tl, tl, env)
>  #endif
>  DEF_HELPER_FLAGS_3(wrdsp, 0, void, tl, tl, env)
>  DEF_HELPER_FLAGS_2(rddsp, 0, tl, tl, env)
> +
> +/* MIPS SIMD Architecture */
> +
> +DEF_HELPER_4(msa_andi_b, void, env, i32, i32, i32)
> +DEF_HELPER_4(msa_bmnzi_b, void, env, i32, i32, i32)
> +DEF_HELPER_4(msa_bmzi_b, void, env, i32, i32, i32)
> +DEF_HELPER_4(msa_bseli_b, void, env, i32, i32, i32)
> +DEF_HELPER_4(msa_nori_b, void, env, i32, i32, i32)
> +DEF_HELPER_4(msa_ori_b, void, env, i32, i32, i32)
> +DEF_HELPER_5(msa_shf_df, void, env, i32, i32, i32, i32)
> +DEF_HELPER_4(msa_xori_b, void, env, i32, i32, i32)
> diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
> index 5afc9ae..2355809 100644
> --- a/target-mips/msa_helper.c
> +++ b/target-mips/msa_helper.c
> @@ -194,3 +194,143 @@ static inline void msa_store_wr_elem(CPUMIPSState *env, uint64_t val,
>          assert(0);
>      }
>  }
> +
> +void helper_msa_andi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
> +        uint32_t i8)
> +{
> +    void *pwd = &(env->active_fpu.fpr[wd]);
> +    void *pws = &(env->active_fpu.fpr[ws]);
> +    ALL_B_ELEMENTS(i, MSA_WRLEN) {
> +        B(pwd, i) = B(pws, i) & i8;
> +    } DONE_ALL_ELEMENTS;
> +    if (env->active_msa.msair & MSAIR_WRP_BIT) {
> +        env->active_msa.msamodify |= (1 << wd);
> +    }
> +}
> +
> +void helper_msa_ori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
> +        uint32_t i8)
> +{
> +    void *pwd = &(env->active_fpu.fpr[wd]);
> +    void *pws = &(env->active_fpu.fpr[ws]);
> +    ALL_B_ELEMENTS(i, MSA_WRLEN) {
> +        B(pwd, i) = B(pws, i) | i8;
> +    } DONE_ALL_ELEMENTS;
> +    if (env->active_msa.msair & MSAIR_WRP_BIT) {
> +        env->active_msa.msamodify |= (1 << wd);
> +    }
> +}
> +
> +void helper_msa_nori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
> +        uint32_t i8)
> +{
> +    void *pwd = &(env->active_fpu.fpr[wd]);
> +    void *pws = &(env->active_fpu.fpr[ws]);
> +    ALL_B_ELEMENTS(i, MSA_WRLEN) {
> +        B(pwd, i) = ~(B(pws, i) | i8);
> +    } DONE_ALL_ELEMENTS;
> +    if (env->active_msa.msair & MSAIR_WRP_BIT) {
> +        env->active_msa.msamodify |= (1 << wd);
> +    }
> +}
> +
> +void helper_msa_xori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
> +        uint32_t i8)
> +{
> +    void *pwd = &(env->active_fpu.fpr[wd]);
> +    void *pws = &(env->active_fpu.fpr[ws]);
> +    ALL_B_ELEMENTS(i, MSA_WRLEN) {
> +        B(pwd, i) = B(pws, i) ^ i8;
> +    } DONE_ALL_ELEMENTS;
> +    if (env->active_msa.msair & MSAIR_WRP_BIT) {
> +        env->active_msa.msamodify |= (1 << wd);
> +    }
> +}
> +
> +#define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
> +            dest = UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
> +
> +void helper_msa_bmnzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
> +        uint32_t i8)
> +{
> +    void *pwd = &(env->active_fpu.fpr[wd]);
> +    void *pws = &(env->active_fpu.fpr[ws]);
> +    ALL_B_ELEMENTS(i, MSA_WRLEN) {
> +        BIT_MOVE_IF_NOT_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
> +    } DONE_ALL_ELEMENTS;
> +    if (env->active_msa.msair & MSAIR_WRP_BIT) {
> +        env->active_msa.msamodify |= (1 << wd);
> +    }
> +}
> +
> +#define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
> +            dest = UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
> +
> +void helper_msa_bmzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
> +        uint32_t i8)
> +{
> +    void *pwd = &(env->active_fpu.fpr[wd]);
> +    void *pws = &(env->active_fpu.fpr[ws]);
> +    ALL_B_ELEMENTS(i, MSA_WRLEN) {
> +        BIT_MOVE_IF_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
> +    } DONE_ALL_ELEMENTS;
> +    if (env->active_msa.msair & MSAIR_WRP_BIT) {
> +        env->active_msa.msamodify |= (1 << wd);
> +    }
> +}
> +
> +#define BIT_SELECT(dest, arg1, arg2, df) \
> +            dest = UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
> +
> +void helper_msa_bseli_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
> +        uint32_t i8)
> +{
> +    void *pwd = &(env->active_fpu.fpr[wd]);
> +    void *pws = &(env->active_fpu.fpr[ws]);
> +    ALL_B_ELEMENTS(i, MSA_WRLEN) {
> +        BIT_SELECT(B(pwd, i), B(pws, i), i8, DF_BYTE);
> +    } DONE_ALL_ELEMENTS;
> +    if (env->active_msa.msair & MSAIR_WRP_BIT) {
> +        env->active_msa.msamodify |= (1 << wd);
> +    }
> +}

I reckon the functions above could all be done easily enough in TCG by
repeating i8 up to 64-bits (at translation time) and doing the
operations on 64-bit quantities. Out of interest, was there a particular
motivation to do it with helpers?

In any case, that can always be an experiment for a later patch, and it
all looks technically correct.

Reviewed-by: James Hogan <james.hogan@imgtec.com>

Cheers
James

^ permalink raw reply	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 11/20] target-mips: add MSA I5 format instructions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (9 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 10/20] target-mips: add MSA I8 format instructions Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 12/20] target-mips: add MSA BIT " Yongbok Kim
                   ` (8 subsequent siblings)
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA I5 format instructions:

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.h     |   12 ++
 target-mips/msa_helper.c |  273 ++++++++++++++++++++++++++++++++++++++++++++++
 target-mips/translate.c  |   91 +++++++++++++++
 3 files changed, 376 insertions(+), 0 deletions(-)

diff --git a/target-mips/helper.h b/target-mips/helper.h
index 174bc62..fe0cf48 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -692,11 +692,23 @@ DEF_HELPER_FLAGS_2(rddsp, 0, tl, tl, env)
 
 /* MIPS SIMD Architecture */
 
+DEF_HELPER_5(msa_addvi_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_4(msa_andi_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_bmnzi_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_bmzi_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_bseli_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_ceqi_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_clei_s_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_clei_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_clti_s_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_clti_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_4(msa_ldi_df, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_maxi_s_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_maxi_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_mini_s_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_mini_u_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_4(msa_nori_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ori_b, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_shf_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_subvi_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_4(msa_xori_b, void, env, i32, i32, i32)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index 2355809..00b6e77 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -195,6 +195,38 @@ static inline void msa_store_wr_elem(CPUMIPSState *env, uint64_t val,
     }
 }
 
+void helper_msa_addvi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = (int64_t) ts + u5;
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_subvi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = (int64_t) ts - u5;
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_andi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
@@ -295,6 +327,120 @@ void helper_msa_bseli_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
     }
 }
 
+static inline int64_t msa_ceq_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    return arg1 == arg2 ? -1 : 0;
+}
+
+void helper_msa_ceqi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t i5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_ceq_df(env, df, ts, i5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_cle_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 <= arg2 ? -1 : 0;
+}
+
+void helper_msa_clei_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_cle_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_cle_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 <= u_arg2 ? -1 : 0;
+}
+
+void helper_msa_clei_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        td = msa_cle_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_clt_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 < arg2 ? -1 : 0;
+}
+
+void helper_msa_clti_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_clt_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_clt_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 < u_arg2 ? -1 : 0;
+}
+
+void helper_msa_clti_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_clt_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 #define SHF_POS(i, imm) ((i & 0xfc) + ((imm >> (2 * (i & 0x03))) & 0x03))
 
 static inline void msa_shf_df(CPUMIPSState *env, uint32_t df, void *pwd,
@@ -334,3 +480,130 @@ void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         env->active_msa.msamodify |= (1 << wd);
     }
 }
+
+static inline int64_t msa_max_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 > arg2 ? arg1 : arg2;
+}
+
+void helper_msa_maxi_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_max_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_max_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 > u_arg2 ? arg1 : arg2;
+}
+
+void helper_msa_maxi_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        td = msa_max_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_min_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 < arg2 ? arg1 : arg2;
+}
+
+void helper_msa_mini_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_min_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_min_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 < u_arg2 ? arg1 : arg2;
+}
+
+void helper_msa_mini_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        td = msa_min_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t s10)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    int64_t s64 = ((int64_t)s10 << 54) >> 54;
+    switch (df) {
+    case DF_BYTE:
+        ALL_B_ELEMENTS(i, MSA_WRLEN) {
+            B(pwd, i)   = (int8_t)s10;
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            H(pwd, i)   = (int16_t)s64;
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            W(pwd, i)   = (int32_t)s64;
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            D(pwd, i)   = s64;
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
diff --git a/target-mips/translate.c b/target-mips/translate.c
index c241299..7fde3bb 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -14847,6 +14847,93 @@ static void gen_msa_i8(CPUMIPSState *env, DisasContext *ctx)
     tcg_temp_free_i32(ti8);
 }
 
+static void gen_msa_i5(CPUMIPSState *env, DisasContext *ctx)
+{
+#define MASK_MSA_I5(op)    (MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
+    uint32_t opcode = ctx->opcode;
+
+    uint8_t df = (ctx->opcode >> 21) & 0x3 /* df [22:21] */;
+    int64_t s5 = (ctx->opcode >> 16) & 0x1f /* s5 [20:16] */;
+    s5 = (s5 << 59) >> 59; /* sign extend s5 to 64 bits*/
+    uint8_t u5 = (ctx->opcode >> 16) & 0x1f /* u5 [20:16] */;
+    uint8_t ws = (ctx->opcode >> 11) & 0x1f /* ws [15:11] */;
+    uint8_t wd = (ctx->opcode >> 6) & 0x1f /* wd [10:6] */;
+
+    TCGv_i32 tdf = tcg_const_i32(df);
+    TCGv_i32 twd = tcg_const_i32(wd);
+    TCGv_i32 tws = tcg_const_i32(ws);
+    TCGv_i64 tu5 = tcg_const_i64(u5);
+    TCGv_i64 ts5 = tcg_const_i64(s5);
+
+    switch (MASK_MSA_I5(opcode)) {
+    case OPC_MSA_ADDVI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_addvi_df(cpu_env, tdf, twd, tws, tu5);
+        break;
+    case OPC_MSA_SUBVI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_subvi_df(cpu_env, tdf, twd, tws, tu5);
+        break;
+    case OPC_MSA_MAXI_S_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_maxi_s_df(cpu_env, tdf, twd, tws, ts5);
+        break;
+    case OPC_MSA_MAXI_U_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_maxi_u_df(cpu_env, tdf, twd, tws, tu5);
+        break;
+    case OPC_MSA_MINI_S_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_mini_s_df(cpu_env, tdf, twd, tws, ts5);
+        break;
+    case OPC_MSA_MINI_U_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_mini_u_df(cpu_env, tdf, twd, tws, tu5);
+        break;
+    case OPC_MSA_CEQI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_ceqi_df(cpu_env, tdf, twd, tws, ts5);
+        break;
+    case OPC_MSA_CLTI_S_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_clti_s_df(cpu_env, tdf, twd, tws, ts5);
+        break;
+    case OPC_MSA_CLTI_U_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_clti_u_df(cpu_env, tdf, twd, tws, tu5);
+        break;
+    case OPC_MSA_CLEI_S_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_clei_s_df(cpu_env, tdf, twd, tws, ts5);
+        break;
+    case OPC_MSA_CLEI_U_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_clei_u_df(cpu_env, tdf, twd, tws, tu5);
+        break;
+    case OPC_MSA_LDI_df:
+        {
+            int64_t s10 = (ctx->opcode >> 11) & 0x3ff /* s10 [20:11] */;
+            s10 = (s10 << 54) >> 54; /* sign extend s10 to 64 bits*/
+
+            TCGv_i32 ts10 = tcg_const_i32(s10);
+            check_msa_access(env, ctx, -1, ws, wd);
+            gen_helper_msa_ldi_df(cpu_env, tdf, twd, ts10);
+            tcg_temp_free_i32(ts10);
+        }
+        break;
+    default:
+        MIPS_INVAL("MSA instruction");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+
+    tcg_temp_free_i32(tdf);
+    tcg_temp_free_i32(twd);
+    tcg_temp_free_i32(tws);
+    tcg_temp_free_i64(tu5);
+    tcg_temp_free_i64(ts5);
+}
+
 static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
 {
     uint32_t opcode = ctx->opcode;
@@ -14858,6 +14945,10 @@ static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
     case OPC_MSA_I8_02:
         gen_msa_i8(env, ctx);
         break;
+    case OPC_MSA_I5_06:
+    case OPC_MSA_I5_07:
+        gen_msa_i5(env, ctx);
+        break;
     default:
         MIPS_INVAL("MSA instruction");
         generate_exception(ctx, EXCP_RI);
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 12/20] target-mips: add MSA BIT format instructions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (10 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 11/20] target-mips: add MSA I5 " Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 13/20] target-mips: add MSA 3R " Yongbok Kim
                   ` (7 subsequent siblings)
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA BIT format instructions

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.h     |   12 ++
 target-mips/msa_helper.c |  292 ++++++++++++++++++++++++++++++++++++++++++++++
 target-mips/translate.c  |  100 ++++++++++++++++
 3 files changed, 404 insertions(+), 0 deletions(-)

diff --git a/target-mips/helper.h b/target-mips/helper.h
index fe0cf48..f9406d6 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -694,9 +694,14 @@ DEF_HELPER_FLAGS_2(rddsp, 0, tl, tl, env)
 
 DEF_HELPER_5(msa_addvi_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_4(msa_andi_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_bclri_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_binsli_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_binsri_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_bmnzi_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_bmzi_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_bnegi_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_bseli_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_bseti_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_ceqi_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_clei_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_clei_u_df, void, env, i32, i32, i32, s64)
@@ -709,6 +714,13 @@ DEF_HELPER_5(msa_mini_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_mini_u_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_4(msa_nori_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ori_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_sat_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_sat_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_shf_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_slli_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_srai_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_srari_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_srli_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_srlri_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_subvi_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_4(msa_xori_b, void, env, i32, i32, i32)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index 00b6e77..39377d6 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -279,6 +279,140 @@ void helper_msa_xori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
     }
 }
 
+static inline int64_t msa_bclr_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+
+    return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
+}
+
+void helper_msa_bclri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_bclr_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_bneg_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
+}
+
+void helper_msa_bnegi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_bneg_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_bset_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return UNSIGNED(arg1 | (1LL << b_arg2), df);
+}
+
+void helper_msa_bseti_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_bset_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_binsl_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_dest = UNSIGNED(dest, df);
+    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
+    int32_t sh_a = DF_BITS(df) - sh_d;
+    if (sh_d == DF_BITS(df)) {
+        return u_arg1;
+    } else {
+        return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
+               UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
+    }
+}
+
+void helper_msa_binsli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_binsl_df(env, df, td, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_binsr_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_dest = UNSIGNED(dest, df);
+    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
+    int32_t sh_a = DF_BITS(df) - sh_d;
+    if (sh_d == DF_BITS(df)) {
+        return u_arg1;
+    } else {
+        return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
+               UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
+    }
+}
+
+void helper_msa_binsri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_binsr_df(env, df, td, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 #define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
             dest = UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
 
@@ -607,3 +741,161 @@ void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         env->active_msa.msamodify |= (1 << wd);
     }
 }
+
+static inline int64_t msa_sat_u_df(CPUMIPSState *env, uint32_t df, int64_t arg,
+        uint32_t m)
+{
+    uint64_t u_arg = UNSIGNED(arg, df);
+    return  u_arg < M_MAX_UINT(m+1) ? u_arg :
+                                      M_MAX_UINT(m+1);
+}
+
+void helper_msa_sat_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        td = msa_sat_u_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_sat_s_df(CPUMIPSState *env, uint32_t df, int64_t arg,
+        uint32_t m)
+{
+    return arg < M_MIN_INT(m+1) ? M_MIN_INT(m+1) :
+                                  arg > M_MAX_INT(m+1) ? M_MAX_INT(m+1) :
+                                                         arg;
+}
+
+void helper_msa_sat_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_sat_s_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_slli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = ts << m;
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_srai_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = ts >> m;
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_srli_df(CPUMIPSState *env, uint32_t df, int64_t arg,
+        uint32_t m)
+{
+    uint64_t u_arg = UNSIGNED(arg, df);
+    return u_arg >> m;
+}
+
+void helper_msa_srli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_srli_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_srari_df(CPUMIPSState *env, uint32_t df, int64_t arg,
+        uint32_t m)
+{
+    if (m == 0) {
+        return arg;
+    } else {
+        int64_t r_bit = (arg >> (m - 1)) & 1;
+        return (arg >> m) + r_bit;
+    }
+}
+
+void helper_msa_srari_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_srari_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_srlri_df(CPUMIPSState *env, uint32_t df, int64_t arg,
+        uint32_t m)
+{
+    uint64_t u_arg = UNSIGNED(arg, df);
+    if (m == 0) {
+        return u_arg;
+    } else {
+        uint64_t r_bit = (u_arg >> (m - 1)) & 1;
+        return (u_arg >> m) + r_bit;
+    }
+}
+
+void helper_msa_srlri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_srlri_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 7fde3bb..f97d3a9 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -14934,6 +14934,102 @@ static void gen_msa_i5(CPUMIPSState *env, DisasContext *ctx)
     tcg_temp_free_i64(ts5);
 }
 
+static void gen_msa_bit(CPUMIPSState *env, DisasContext *ctx)
+{
+#define MASK_MSA_BIT(op)    (MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
+    uint32_t opcode = ctx->opcode;
+
+    uint8_t dfm = (ctx->opcode >> 16) & 0x7f /* dfm [22:16] */;
+    uint32_t df = 0, m = 0;
+
+    if ((dfm & 0x40) == 0x00) {         /* double data format */
+        m = dfm & 0x3f;
+        df = 3;
+    } else if ((dfm & 0x60) == 0x40) {  /* word data format */
+        m = dfm & 0x1f;
+        df = 2;
+    } else if ((dfm & 0x70) == 0x60) {  /* half data format */
+        m = dfm & 0x0f;
+        df = 1;
+    } else if ((dfm & 0x78) == 0x70) {  /* byte data format */
+        m = dfm & 0x7;
+        df = 0;
+    } else {
+        if (check_msa_access(env, ctx, -1, -1, -1)) {
+            generate_exception(ctx, EXCP_RI);
+        }
+        return;
+    }
+
+    uint8_t ws = (ctx->opcode >> 11) & 0x1f /* ws [15:11] */;
+    uint8_t wd = (ctx->opcode >> 6) & 0x1f /* wd [10:6] */;
+
+    TCGv_i32 tdf = tcg_const_i32(df);
+    TCGv_i32 tm  = tcg_const_i32(m);
+    TCGv_i32 twd = tcg_const_i32(wd);
+    TCGv_i32 tws = tcg_const_i32(ws);
+
+    switch (MASK_MSA_BIT(opcode)) {
+    case OPC_MSA_SLLI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_slli_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    case OPC_MSA_SRAI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_srai_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    case OPC_MSA_SRLI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_srli_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    case OPC_MSA_BCLRI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_bclri_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    case OPC_MSA_BSETI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_bseti_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    case OPC_MSA_BNEGI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_bnegi_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    case OPC_MSA_BINSLI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_binsli_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    case OPC_MSA_BINSRI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_binsri_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    case OPC_MSA_SAT_S_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_sat_s_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    case OPC_MSA_SAT_U_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_sat_u_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    case OPC_MSA_SRARI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_srari_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    case OPC_MSA_SRLRI_df:
+        check_msa_access(env, ctx, -1, ws, wd);
+        gen_helper_msa_srlri_df(cpu_env, tdf, twd, tws, tm);
+        break;
+    default:
+        MIPS_INVAL("MSA instruction");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+
+    tcg_temp_free_i32(tdf);
+    tcg_temp_free_i32(tm);
+    tcg_temp_free_i32(twd);
+    tcg_temp_free_i32(tws);
+}
+
 static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
 {
     uint32_t opcode = ctx->opcode;
@@ -14949,6 +15045,10 @@ static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
     case OPC_MSA_I5_07:
         gen_msa_i5(env, ctx);
         break;
+    case OPC_MSA_BIT_09:
+    case OPC_MSA_BIT_0A:
+        gen_msa_bit(env, ctx);
+        break;
     default:
         MIPS_INVAL("MSA instruction");
         generate_exception(ctx, EXCP_RI);
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 13/20] target-mips: add MSA 3R format instructions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (11 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 12/20] target-mips: add MSA BIT " Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 14/20] target-mips: add MSA ELM " Yongbok Kim
                   ` (6 subsequent siblings)
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA 3R format instructions

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.h     |   63 ++
 target-mips/msa_helper.c | 2193 +++++++++++++++++++++++++++++++++++++++++-----
 target-mips/translate.c  |  300 +++++++
 3 files changed, 2329 insertions(+), 227 deletions(-)

diff --git a/target-mips/helper.h b/target-mips/helper.h
index f9406d6..00705c4 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -692,35 +692,98 @@ DEF_HELPER_FLAGS_2(rddsp, 0, tl, tl, env)
 
 /* MIPS SIMD Architecture */
 
+DEF_HELPER_5(msa_add_a_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_adds_a_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_adds_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_adds_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_addv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_addvi_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_4(msa_andi_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_asub_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_asub_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ave_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ave_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_aver_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_aver_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_bclr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_bclri_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_binsl_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_binsli_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_binsr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_binsri_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_bmnzi_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_bmzi_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_bneg_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_bnegi_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_bseli_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_bset_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_bseti_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ceq_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_ceqi_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_cle_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_cle_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_clei_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_clei_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_clt_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_clt_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_clti_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_clti_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_div_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_div_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dotp_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dotp_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dpadd_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dpadd_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dpsub_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_dpsub_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_hadd_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_hadd_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_hsub_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_hsub_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ilvev_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ilvl_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ilvod_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ilvr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_ldi_df, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_maddv_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_max_a_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_max_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_max_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_maxi_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_maxi_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_min_a_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_min_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_min_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_mini_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_mini_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_mod_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_mod_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_msubv_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_mulv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_nori_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ori_b, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_pckev_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_pckod_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_sat_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_sat_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_shf_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_sld_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_sll_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_slli_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_splat_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_sra_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srai_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_srar_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srari_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_srl_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srli_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_srlr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srlri_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_subs_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_subs_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_subsus_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_subsuu_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_subv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_subvi_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_vshf_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_xori_b, void, env, i32, i32, i32)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index 39377d6..bb4ea65 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -195,6 +195,48 @@ static inline void msa_store_wr_elem(CPUMIPSState *env, uint64_t val,
     }
 }
 
+static inline int64_t msa_add_a_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
+    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
+    return abs_arg1 + abs_arg2;
+}
+
+void helper_msa_add_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_add_a_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_addv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = (int64_t) ts + tt;
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_addvi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, int64_t u5)
 {
@@ -211,6 +253,23 @@ void helper_msa_addvi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+void helper_msa_subv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = (int64_t) ts - tt;
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_subvi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, int64_t u5)
 {
@@ -227,75 +286,84 @@ void helper_msa_subvi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-void helper_msa_andi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
-        uint32_t i8)
+static inline int64_t msa_adds_a_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    void *pws = &(env->active_fpu.fpr[ws]);
-    ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        B(pwd, i) = B(pws, i) & i8;
-    } DONE_ALL_ELEMENTS;
-    if (env->active_msa.msair & MSAIR_WRP_BIT) {
-        env->active_msa.msamodify |= (1 << wd);
+    uint64_t max_int = (uint64_t)DF_MAX_INT(df);
+    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
+    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
+    if (abs_arg1 > max_int || abs_arg2 > max_int) {
+        return (int64_t)max_int;
+    } else {
+        return (abs_arg1 < max_int - abs_arg2) ? abs_arg1 + abs_arg2 : max_int;
     }
 }
 
-void helper_msa_ori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
-        uint32_t i8)
+void helper_msa_adds_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    void *pws = &(env->active_fpu.fpr[ws]);
-    ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        B(pwd, i) = B(pws, i) | i8;
-    } DONE_ALL_ELEMENTS;
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_adds_a_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-void helper_msa_nori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
-        uint32_t i8)
+static inline int64_t msa_adds_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    void *pws = &(env->active_fpu.fpr[ws]);
-    ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        B(pwd, i) = ~(B(pws, i) | i8);
-    } DONE_ALL_ELEMENTS;
-    if (env->active_msa.msair & MSAIR_WRP_BIT) {
-        env->active_msa.msamodify |= (1 << wd);
+    int64_t max_int = DF_MAX_INT(df);
+    int64_t min_int = DF_MIN_INT(df);
+    if (arg1 < 0) {
+        return (min_int - arg1 < arg2) ? arg1 + arg2 : min_int;
+    } else {
+        return (arg2 < max_int - arg1) ? arg1 + arg2 : max_int;
     }
 }
 
-void helper_msa_xori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
-        uint32_t i8)
+void helper_msa_adds_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    void *pws = &(env->active_fpu.fpr[ws]);
-    ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        B(pwd, i) = B(pws, i) ^ i8;
-    } DONE_ALL_ELEMENTS;
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_adds_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-static inline int64_t msa_bclr_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
-        int64_t arg2)
+static inline uint64_t msa_adds_u_df(CPUMIPSState *env, uint32_t df,
+        uint64_t arg1, uint64_t arg2)
 {
-    int32_t b_arg2 = BIT_POSITION(arg2, df);
-
-    return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
+    uint64_t max_uint = DF_MAX_UINT(df);
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return (u_arg1 < max_uint - u_arg2) ? u_arg1 + u_arg2 : max_uint;
 }
 
-void helper_msa_bclri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t m)
+void helper_msa_adds_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
-        ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_bclr_df(env, df, ts, m);
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_adds_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -303,22 +371,28 @@ void helper_msa_bclri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_bneg_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
-        int64_t arg2)
+static inline int64_t msa_subs_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
-    int32_t b_arg2 = BIT_POSITION(arg2, df);
-    return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
+    int64_t max_int = DF_MAX_INT(df);
+    int64_t min_int = DF_MIN_INT(df);
+    if (arg2 > 0) {
+        return (min_int + arg2 < arg1) ? arg1 - arg2 : min_int;
+    } else {
+        return (arg1 < max_int + arg2) ? arg1 - arg2 : max_int;
+    }
 }
 
-void helper_msa_bnegi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t m)
+void helper_msa_subs_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_bneg_df(env, df, ts, m);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_subs_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -326,22 +400,24 @@ void helper_msa_bnegi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_bset_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
-        int64_t arg2)
+static inline int64_t msa_subs_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
-    int32_t b_arg2 = BIT_POSITION(arg2, df);
-    return UNSIGNED(arg1 | (1LL << b_arg2), df);
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return (u_arg1 > u_arg2) ? u_arg1 - u_arg2 : 0;
 }
 
-void helper_msa_bseti_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t m)
+void helper_msa_subs_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
-        ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_bset_df(env, df, ts, m);
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_subs_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -349,31 +425,34 @@ void helper_msa_bseti_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_binsl_df(CPUMIPSState *env, uint32_t df,
-        int64_t dest, int64_t arg1, int64_t arg2)
+static inline int64_t msa_subsuu_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
-    uint64_t u_dest = UNSIGNED(dest, df);
-    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
-    int32_t sh_a = DF_BITS(df) - sh_d;
-    if (sh_d == DF_BITS(df)) {
-        return u_arg1;
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    int64_t max_int = DF_MAX_INT(df);
+    int64_t min_int = DF_MIN_INT(df);
+    if (u_arg1 > u_arg2) {
+        return u_arg1 - u_arg2 < (uint64_t)max_int ?
+            (int64_t)(u_arg1 - u_arg2) :
+            max_int;
     } else {
-        return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
-               UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
+        return u_arg2 - u_arg1 < (uint64_t)(-min_int) ?
+            (int64_t)(u_arg1 - u_arg2) :
+            min_int;
     }
 }
 
-void helper_msa_binsli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t m)
+void helper_msa_subsuu_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_load_wr_elem_s64(env, wd, df, i);
-        td = msa_binsl_df(env, df, td, ts, m);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_subsuu_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -381,31 +460,34 @@ void helper_msa_binsli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_binsr_df(CPUMIPSState *env, uint32_t df,
-        int64_t dest, int64_t arg1, int64_t arg2)
+static inline int64_t msa_subsus_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
-    uint64_t u_dest = UNSIGNED(dest, df);
-    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
-    int32_t sh_a = DF_BITS(df) - sh_d;
-    if (sh_d == DF_BITS(df)) {
-        return u_arg1;
+    uint64_t max_uint = DF_MAX_UINT(df);
+    if (arg2 >= 0) {
+        uint64_t u_arg2 = (uint64_t)arg2;
+        return (u_arg1 > u_arg2) ?
+            (int64_t)(u_arg1 - u_arg2) :
+            0;
     } else {
-        return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
-               UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
+        uint64_t u_arg2 = (uint64_t)(-arg2);
+        return (u_arg1 < max_uint - u_arg2) ?
+            (int64_t)(u_arg1 + u_arg2) :
+            (int64_t)max_uint;
     }
 }
 
-void helper_msa_binsri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t m)
+void helper_msa_subsus_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_load_wr_elem_s64(env, wd, df, i);
-        td = msa_binsr_df(env, df, td, ts, m);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_subsus_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -413,69 +495,76 @@ void helper_msa_binsri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-#define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
-            dest = UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
-
-void helper_msa_bmnzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+void helper_msa_andi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
     void *pwd = &(env->active_fpu.fpr[wd]);
     void *pws = &(env->active_fpu.fpr[ws]);
     ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        BIT_MOVE_IF_NOT_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
+        B(pwd, i) = B(pws, i) & i8;
     } DONE_ALL_ELEMENTS;
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-#define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
-            dest = UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
-
-void helper_msa_bmzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+void helper_msa_ori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
     void *pwd = &(env->active_fpu.fpr[wd]);
     void *pws = &(env->active_fpu.fpr[ws]);
     ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        BIT_MOVE_IF_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
+        B(pwd, i) = B(pws, i) | i8;
     } DONE_ALL_ELEMENTS;
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-#define BIT_SELECT(dest, arg1, arg2, df) \
-            dest = UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
+void helper_msa_nori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        B(pwd, i) = ~(B(pws, i) | i8);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
 
-void helper_msa_bseli_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+void helper_msa_xori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
     void *pwd = &(env->active_fpu.fpr[wd]);
     void *pws = &(env->active_fpu.fpr[ws]);
     ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        BIT_SELECT(B(pwd, i), B(pws, i), i8, DF_BYTE);
+        B(pwd, i) = B(pws, i) ^ i8;
     } DONE_ALL_ELEMENTS;
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-static inline int64_t msa_ceq_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
-        int64_t arg2)
+static inline int64_t msa_asub_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
 {
-    return arg1 == arg2 ? -1 : 0;
+    /* signed compare */
+    return (arg1 < arg2) ?
+        (uint64_t)(arg2 - arg1) : (uint64_t)(arg1 - arg2);
 }
 
-void helper_msa_ceqi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t i5)
+void helper_msa_asub_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_ceq_df(env, df, ts, i5);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_asub_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -483,21 +572,50 @@ void helper_msa_ceqi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_cle_s_df(CPUMIPSState *env, uint32_t df,
+static inline uint64_t msa_asub_u_df(CPUMIPSState *env, uint32_t df,
+        uint64_t arg1, uint64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    /* unsigned compare */
+    return (u_arg1 < u_arg2) ?
+        (uint64_t)(u_arg2 - u_arg1) : (uint64_t)(u_arg1 - u_arg2);
+}
+
+void helper_msa_asub_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_asub_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_ave_s_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
-    return arg1 <= arg2 ? -1 : 0;
+    /* signed shift */
+    return (arg1 >> 1) + (arg2 >> 1) + (arg1 & arg2 & 1);
 }
 
-void helper_msa_clei_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t s5)
+void helper_msa_ave_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_cle_s_df(env, df, ts, s5);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_ave_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -505,23 +623,25 @@ void helper_msa_clei_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_cle_u_df(CPUMIPSState *env, uint32_t df,
-        int64_t arg1, int64_t arg2)
+static inline uint64_t msa_ave_u_df(CPUMIPSState *env, uint32_t df,
+        uint64_t arg1, uint64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
     uint64_t u_arg2 = UNSIGNED(arg2, df);
-    return u_arg1 <= u_arg2 ? -1 : 0;
+    /* unsigned shift */
+    return (u_arg1 >> 1) + (u_arg2 >> 1) + (u_arg1 & u_arg2 & 1);
 }
 
-void helper_msa_clei_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t u5)
+void helper_msa_ave_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    uint64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_i64(env, ws, df, i);
-        td = msa_cle_u_df(env, df, ts, u5);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_ave_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -529,21 +649,49 @@ void helper_msa_clei_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_clt_s_df(CPUMIPSState *env, uint32_t df,
+static inline int64_t msa_aver_s_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
-    return arg1 < arg2 ? -1 : 0;
+    /* signed shift */
+    return (arg1 >> 1) + (arg2 >> 1) + ((arg1 | arg2) & 1);
 }
 
-void helper_msa_clti_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t s5)
+void helper_msa_aver_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_clt_s_df(env, df, ts, s5);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_aver_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline uint64_t msa_aver_u_df(CPUMIPSState *env, uint32_t df,
+        uint64_t arg1, uint64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    /* unsigned shift */
+    return (u_arg1 >> 1) + (u_arg2 >> 1) + ((u_arg1 | u_arg2) & 1);
+}
+
+void helper_msa_aver_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_aver_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -551,85 +699,1520 @@ void helper_msa_clti_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_clt_u_df(CPUMIPSState *env, uint32_t df,
-        int64_t arg1, int64_t arg2)
-{
-    uint64_t u_arg1 = UNSIGNED(arg1, df);
-    uint64_t u_arg2 = UNSIGNED(arg2, df);
-    return u_arg1 < u_arg2 ? -1 : 0;
-}
-
-void helper_msa_clti_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t u5)
+static inline int64_t msa_bclr_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return UNSIGNED(arg1 & (~(1LL << b_arg2)), df);
+}
+
+void helper_msa_bclr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_bclr_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_bclri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_bclr_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_bneg_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return UNSIGNED(arg1 ^ (1LL << b_arg2), df);
+}
+
+void helper_msa_bneg_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_bneg_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_bnegi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_bneg_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_bset_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return UNSIGNED(arg1 | (1LL << b_arg2), df);
+}
+
+void helper_msa_bset_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_bset_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_bseti_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_bset_df(env, df, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_binsl_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_dest = UNSIGNED(dest, df);
+    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
+    int32_t sh_a = DF_BITS(df) - sh_d;
+    if (sh_d == DF_BITS(df)) {
+        return u_arg1;
+    } else {
+        return UNSIGNED(UNSIGNED(u_dest << sh_d, df) >> sh_d, df) |
+               UNSIGNED(UNSIGNED(u_arg1 >> sh_a, df) << sh_a, df);
+    }
+}
+
+void helper_msa_binsl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_binsl_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_binsli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_binsl_df(env, df, td, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_binsr_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_dest = UNSIGNED(dest, df);
+    int32_t sh_d = BIT_POSITION(arg2, df) + 1;
+    int32_t sh_a = DF_BITS(df) - sh_d;
+    if (sh_d == DF_BITS(df)) {
+        return u_arg1;
+    } else {
+        return UNSIGNED(UNSIGNED(u_dest >> sh_d, df) << sh_d, df) |
+               UNSIGNED(UNSIGNED(u_arg1 << sh_a, df) >> sh_a, df);
+    }
+}
+
+void helper_msa_binsr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_binsr_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_binsri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t m)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_binsr_df(env, df, td, ts, m);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
+            dest = UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
+
+void helper_msa_bmnzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        BIT_MOVE_IF_NOT_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
+            dest = UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
+
+void helper_msa_bmzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        BIT_MOVE_IF_ZERO(B(pwd, i), B(pws, i), i8, DF_BYTE);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define BIT_SELECT(dest, arg1, arg2, df) \
+            dest = UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
+
+void helper_msa_bseli_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t i8)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        BIT_SELECT(B(pwd, i), B(pws, i), i8, DF_BYTE);
+    } DONE_ALL_ELEMENTS;
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_ceq_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    return arg1 == arg2 ? -1 : 0;
+}
+
+void helper_msa_ceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_ceq_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_ceqi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t i5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_ceq_df(env, df, ts, i5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_cle_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 <= arg2 ? -1 : 0;
+}
+
+void helper_msa_cle_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_cle_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_clei_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_cle_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_cle_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 <= u_arg2 ? -1 : 0;
+}
+
+void helper_msa_cle_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_cle_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_clei_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        td = msa_cle_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_clt_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 < arg2 ? -1 : 0;
+}
+
+void helper_msa_clt_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_clt_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_clti_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_clt_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_clt_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 < u_arg2 ? -1 : 0;
+}
+
+void helper_msa_clt_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_clt_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_clti_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_clt_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define SIGNED_EVEN(a, df) \
+        ((((int64_t)(a)) << (64 - DF_BITS(df)/2)) >> (64 - DF_BITS(df)/2))
+#define UNSIGNED_EVEN(a, df) \
+        ((((uint64_t)(a)) << (64 - DF_BITS(df)/2)) >> (64 - DF_BITS(df)/2))
+
+#define SIGNED_ODD(a, df) \
+        ((((int64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)/2))
+#define UNSIGNED_ODD(a, df) \
+        ((((uint64_t)(a)) << (64 - DF_BITS(df))) >> (64 - DF_BITS(df)/2))
+
+#define SIGNED_EXTRACT(e, o, a, df)             \
+    int64_t e = SIGNED_EVEN(a, df);             \
+    int64_t o = SIGNED_ODD(a, df);
+
+#define UNSIGNED_EXTRACT(e, o, a, df)           \
+    int64_t e = UNSIGNED_EVEN(a, df);           \
+    int64_t o = UNSIGNED_ODD(a, df);
+
+static inline int64_t msa_hadd_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return SIGNED_ODD(arg1, df) + SIGNED_EVEN(arg2, df);
+}
+
+void helper_msa_hadd_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_hadd_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_hadd_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return UNSIGNED_ODD(arg1, df) + UNSIGNED_EVEN(arg2, df);
+}
+
+void helper_msa_hadd_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_hadd_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_hsub_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return SIGNED_ODD(arg1, df) - SIGNED_EVEN(arg2, df);
+}
+
+void helper_msa_hsub_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_hsub_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_hsub_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return UNSIGNED_ODD(arg1, df) - UNSIGNED_EVEN(arg2, df);
+}
+
+void helper_msa_hsub_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_hsub_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dotp_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dotp_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_dotp_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dotp_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dotp_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_dotp_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dpadd_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dpadd_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_dpadd_s_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dpadd_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest + (even_arg1 * even_arg2) + (odd_arg1 * odd_arg2);
+}
+
+void helper_msa_dpadd_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_dpadd_u_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dpsub_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    SIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    SIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
+}
+
+void helper_msa_dpsub_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_dpsub_s_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_dpsub_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    UNSIGNED_EXTRACT(even_arg1, odd_arg1, arg1, df);
+    UNSIGNED_EXTRACT(even_arg2, odd_arg2, arg2, df);
+    return dest - ((even_arg1 * even_arg2) + (odd_arg1 * odd_arg2));
+}
+
+void helper_msa_dpsub_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_dpsub_u_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_ilvev_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            B(pwx, 2*i)   = B(pwt, 2*i);
+            B(pwx, 2*i+1) = B(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            H(pwx, 2*i)   = H(pwt, 2*i);
+            H(pwx, 2*i+1) = H(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            W(pwx, 2*i)   = W(pwt, 2*i);
+            W(pwx, 2*i+1) = W(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            D(pwx, 2*i)   = D(pwt, 2*i);
+            D(pwx, 2*i+1) = D(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ilvev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_ilvev_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_ilvod_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            B(pwx, 2*i)   = B(pwt, 2*i+1);
+            B(pwx, 2*i+1) = B(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            H(pwx, 2*i)   = H(pwt, 2*i+1);
+            H(pwx, 2*i+1) = H(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            W(pwx, 2*i)   = W(pwt, 2*i+1);
+            W(pwx, 2*i+1) = W(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            D(pwx, 2*i)   = D(pwt, 2*i+1);
+            D(pwx, 2*i+1) = D(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ilvod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_ilvod_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_ilvl_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            B(pwx, 2*i)   = BL(pwt, i);
+            B(pwx, 2*i+1) = BL(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            H(pwx, 2*i)   = HL(pwt, i);
+            H(pwx, 2*i+1) = HL(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            W(pwx, 2*i)   = WL(pwt, i);
+            W(pwx, 2*i+1) = WL(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            D(pwx, 2*i)   = DL(pwt, i);
+            D(pwx, 2*i+1) = DL(pws, i);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ilvl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_ilvl_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_ilvr_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            B(pwx, 2*i)   = BR(pwt, i);
+            B(pwx, 2*i+1) = BR(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            H(pwx, 2*i)   = HR(pwt, i);
+            H(pwx, 2*i+1) = HR(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            W(pwx, 2*i)   = WR(pwt, i);
+            W(pwx, 2*i+1) = WR(pws, i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            D(pwx, 2*i)   = DR(pwt, i);
+            D(pwx, 2*i+1) = DR(pws, i);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ilvr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_ilvr_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_pckev_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            BR(pwx, i) = B(pwt, 2*i);
+            BL(pwx, i) = B(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            HR(pwx, i) = H(pwt, 2*i);
+            HL(pwx, i) = H(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            WR(pwx, i) = W(pwt, 2*i);
+            WL(pwx, i) = W(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            DR(pwx, i) = D(pwt, 2*i);
+            DL(pwx, i) = D(pws, 2*i);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_pckev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_pckev_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_pckod_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            BR(pwx, i) = B(pwt, 2*i+1);
+            BL(pwx, i) = B(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            HR(pwx, i) = H(pwt, 2*i+1);
+            HL(pwx, i) = H(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            WR(pwx, i) = W(pwt, 2*i+1);
+            WL(pwx, i) = W(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_Q_ELEMENTS(i, MSA_WRLEN) {
+            DR(pwx, i) = D(pwt, 2*i+1);
+            DL(pwx, i) = D(pws, 2*i+1);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_pckod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_pckod_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_vshf_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, void *pwt)
+{
+    uint32_t n = MSA_WRLEN / DF_BITS(df);
+    uint32_t k;
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+        /* byte data format */
+        ALL_B_ELEMENTS(i, MSA_WRLEN) {
+            k = (B(pwd, i) & 0x3f) % (2 * n);
+            B(pwx, i) =
+                (B(pwd, i) & 0xc0) ? 0 : k < n ? B(pwt, k) : B(pws, k - n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        /* half data format */
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            k = (H(pwd, i) & 0x3f) % (2 * n);
+            H(pwx, i) =
+                (H(pwd, i) & 0xc0) ? 0 : k < n ? H(pwt, k) : H(pws, k - n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        /* word data format */
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            k = (W(pwd, i) & 0x3f) % (2 * n);
+            W(pwx, i) =
+                (W(pwd, i) & 0xc0) ? 0 : k < n ? W(pwt, k) : W(pws, k - n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        /* double data format */
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            k = (D(pwd, i) & 0x3f) % (2 * n);
+            D(pwx, i) =
+                (D(pwd, i) & 0xc0) ? 0 : k < n ? D(pwt, k) : D(pws, k - n);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_vshf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    msa_vshf_df(env, df, pwd, pws, pwt);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+
+#define SHF_POS(i, imm) ((i & 0xfc) + ((imm >> (2 * (i & 0x03))) & 0x03))
+
+static inline void msa_shf_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, uint32_t imm)
+{
+    wr_t wx, *pwx = &wx;
+    switch (df) {
+    case DF_BYTE:
+      ALL_B_ELEMENTS(i, MSA_WRLEN) {
+        B(pwx, i) = B(pws, SHF_POS(i, imm));
+      } DONE_ALL_ELEMENTS;
+      break;
+    case DF_HALF:
+      ALL_H_ELEMENTS(i, MSA_WRLEN) {
+        H(pwx, i) = H(pws, SHF_POS(i, imm));
+      } DONE_ALL_ELEMENTS;
+      break;
+    case DF_WORD:
+      ALL_W_ELEMENTS(i, MSA_WRLEN) {
+        W(pwx, i) = W(pws, SHF_POS(i, imm));
+      } DONE_ALL_ELEMENTS;
+      break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t imm)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    msa_shf_df(env, df, pwd, pws, imm);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_maddv_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    return dest + arg1 * arg2;
+}
+
+void helper_msa_maddv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_maddv_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_msubv_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    return dest - arg1 * arg2;
+}
+
+void helper_msa_msubv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_msubv_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_max_a_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
+    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
+    return abs_arg1 > abs_arg2 ? arg1 : arg2;
+}
+
+void helper_msa_max_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_max_a_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_max_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 > arg2 ? arg1 : arg2;
+}
+
+void helper_msa_max_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_max_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_maxi_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_max_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_max_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 > u_arg2 ? arg1 : arg2;
+}
+
+void helper_msa_max_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_max_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_maxi_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        td = msa_max_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_min_a_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t abs_arg1 = arg1 >= 0 ? arg1 : -arg1;
+    uint64_t abs_arg2 = arg2 >= 0 ? arg2 : -arg2;
+    return abs_arg1 < abs_arg2 ? arg1 : arg2;
+}
+
+void helper_msa_min_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_min_a_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_min_s_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    return arg1 < arg2 ? arg1 : arg2;
+}
+
+void helper_msa_min_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_min_s_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_mini_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t s5)
+{
+    int64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_min_s_df(env, df, ts, s5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_min_u_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    uint64_t u_arg2 = UNSIGNED(arg2, df);
+    return u_arg1 < u_arg2 ? arg1 : arg2;
+}
+
+void helper_msa_min_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    uint64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_min_u_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_mini_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, int64_t u5)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_i64(env, ws, df, i);
+        td = msa_min_u_df(env, df, ts, u5);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_splat_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, target_ulong rt)
+{
+    uint32_t n = rt % DF_ELEMENTS(df, MSA_WRLEN);
+    msa_check_index(env, df, n);
+    switch (df) {
+    case DF_BYTE:
+        ALL_B_ELEMENTS(i, MSA_WRLEN) {
+            B(pwd, i)   = B(pws, n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            H(pwd, i)   = H(pws, n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            W(pwd, i)   = W(pws, n);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            D(pwd, i)   = D(pws, n);
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
+
+void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t rt)
 {
-    int64_t td, ts;
-    int i;
-    int df_bits = 8 * (1 << df);
-    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
-        ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_clt_u_df(env, df, ts, u5);
-        msa_store_wr_elem(env, td, wd, df, i);
-    }
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    msa_splat_df(env, df, pwd, pws, env->active_tc.gpr[rt]);
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-#define SHF_POS(i, imm) ((i & 0xfc) + ((imm >> (2 * (i & 0x03))) & 0x03))
-
-static inline void msa_shf_df(CPUMIPSState *env, uint32_t df, void *pwd,
-        void *pws, uint32_t imm)
+void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t s10)
 {
-    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    int64_t s64 = ((int64_t)s10 << 54) >> 54;
     switch (df) {
     case DF_BYTE:
-      ALL_B_ELEMENTS(i, MSA_WRLEN) {
-        B(pwx, i) = B(pws, SHF_POS(i, imm));
-      } DONE_ALL_ELEMENTS;
-      break;
+        ALL_B_ELEMENTS(i, MSA_WRLEN) {
+            B(pwd, i)   = (int8_t)s10;
+        } DONE_ALL_ELEMENTS;
+        break;
     case DF_HALF:
-      ALL_H_ELEMENTS(i, MSA_WRLEN) {
-        H(pwx, i) = H(pws, SHF_POS(i, imm));
-      } DONE_ALL_ELEMENTS;
-      break;
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            H(pwd, i)   = (int16_t)s64;
+        } DONE_ALL_ELEMENTS;
+        break;
     case DF_WORD:
-      ALL_W_ELEMENTS(i, MSA_WRLEN) {
-        W(pwx, i) = W(pws, SHF_POS(i, imm));
-      } DONE_ALL_ELEMENTS;
-      break;
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            W(pwd, i)   = (int32_t)s64;
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            D(pwd, i)   = s64;
+        } DONE_ALL_ELEMENTS;
+       break;
     default:
         /* shouldn't get here */
         assert(0);
     }
-    msa_move_v(pwd, pwx);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
 }
 
-void helper_msa_shf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, uint32_t imm)
+void helper_msa_mulv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    void *pws = &(env->active_fpu.fpr[ws]);
-    msa_shf_df(env, df, pwd, pws, imm);
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = ts * tt;
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
         env->active_msa.msamodify |= (1 << wd);
     }
 }
 
-static inline int64_t msa_max_s_df(CPUMIPSState *env, uint32_t df,
+static inline int64_t msa_div_s_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
-    return arg1 > arg2 ? arg1 : arg2;
+    if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
+        return DF_MIN_INT(df);
+    }
+    return arg2 ? arg1 / arg2 : 0;
 }
 
-void helper_msa_maxi_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t s5)
+void helper_msa_div_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_max_s_df(env, df, ts, s5);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_div_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -637,23 +2220,24 @@ void helper_msa_maxi_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_max_u_df(CPUMIPSState *env, uint32_t df,
+static inline int64_t msa_div_u_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
     uint64_t u_arg2 = UNSIGNED(arg2, df);
-    return u_arg1 > u_arg2 ? arg1 : arg2;
+    return u_arg2 ? u_arg1 / u_arg2 : 0;
 }
 
-void helper_msa_maxi_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t u5)
+void helper_msa_div_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    uint64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_i64(env, ws, df, i);
-        td = msa_max_u_df(env, df, ts, u5);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_div_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -661,21 +2245,25 @@ void helper_msa_maxi_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_min_s_df(CPUMIPSState *env, uint32_t df,
+static inline int64_t msa_mod_s_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
-    return arg1 < arg2 ? arg1 : arg2;
+    if (arg1 == DF_MIN_INT(df) && arg2 == -1) {
+        return 0;
+    }
+    return arg2 ? arg1 % arg2 : 0;
 }
 
-void helper_msa_mini_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t s5)
+void helper_msa_mod_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    int64_t td, ts;
+    int64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_s64(env, ws, df, i);
-        td = msa_min_s_df(env, df, ts, s5);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_mod_s_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -683,23 +2271,24 @@ void helper_msa_mini_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-static inline int64_t msa_min_u_df(CPUMIPSState *env, uint32_t df,
+static inline int64_t msa_mod_u_df(CPUMIPSState *env, uint32_t df,
         int64_t arg1, int64_t arg2)
 {
     uint64_t u_arg1 = UNSIGNED(arg1, df);
     uint64_t u_arg2 = UNSIGNED(arg2, df);
-    return u_arg1 < u_arg2 ? arg1 : arg2;
+    return u_arg2 ? u_arg1 % u_arg2 : 0;
 }
 
-void helper_msa_mini_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t ws, int64_t u5)
+void helper_msa_mod_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
 {
-    uint64_t td, ts;
+    uint64_t td, ts, tt;
     int i;
     int df_bits = 8 * (1 << df);
     for (i = 0; i < MSA_WRLEN / df_bits; i++) {
         ts = msa_load_wr_elem_i64(env, ws, df, i);
-        td = msa_min_u_df(env, df, ts, u5);
+        tt = msa_load_wr_elem_i64(env, wt, df, i);
+        td = msa_mod_u_df(env, df, ts, tt);
         msa_store_wr_elem(env, td, wd, df, i);
     }
     if (env->active_msa.msair & MSAIR_WRP_BIT) {
@@ -707,41 +2296,6 @@ void helper_msa_mini_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
-void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
-        uint32_t s10)
-{
-    void *pwd = &(env->active_fpu.fpr[wd]);
-    int64_t s64 = ((int64_t)s10 << 54) >> 54;
-    switch (df) {
-    case DF_BYTE:
-        ALL_B_ELEMENTS(i, MSA_WRLEN) {
-            B(pwd, i)   = (int8_t)s10;
-        } DONE_ALL_ELEMENTS;
-        break;
-    case DF_HALF:
-        ALL_H_ELEMENTS(i, MSA_WRLEN) {
-            H(pwd, i)   = (int16_t)s64;
-        } DONE_ALL_ELEMENTS;
-        break;
-    case DF_WORD:
-        ALL_W_ELEMENTS(i, MSA_WRLEN) {
-            W(pwd, i)   = (int32_t)s64;
-        } DONE_ALL_ELEMENTS;
-        break;
-    case DF_DOUBLE:
-        ALL_D_ELEMENTS(i, MSA_WRLEN) {
-            D(pwd, i)   = s64;
-        } DONE_ALL_ELEMENTS;
-       break;
-    default:
-        /* shouldn't get here */
-        assert(0);
-    }
-    if (env->active_msa.msair & MSAIR_WRP_BIT) {
-        env->active_msa.msamodify |= (1 << wd);
-    }
-}
-
 static inline int64_t msa_sat_u_df(CPUMIPSState *env, uint32_t df, int64_t arg,
         uint32_t m)
 {
@@ -790,6 +2344,30 @@ void helper_msa_sat_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+static inline int64_t msa_sll_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return arg1 << b_arg2;
+}
+
+void helper_msa_sll_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_sll_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_slli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, uint32_t m)
 {
@@ -806,6 +2384,30 @@ void helper_msa_slli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+static inline int64_t msa_sra_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return arg1 >> b_arg2;
+}
+
+void helper_msa_sra_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_sra_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_srai_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, uint32_t m)
 {
@@ -822,6 +2424,31 @@ void helper_msa_srai_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+static inline int64_t msa_srl_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    return u_arg1 >> b_arg2;
+}
+
+void helper_msa_srl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_srl_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 static inline int64_t msa_srli_df(CPUMIPSState *env, uint32_t df, int64_t arg,
         uint32_t m)
 {
@@ -845,6 +2472,35 @@ void helper_msa_srli_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+static inline int64_t msa_srar_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    if (b_arg2 == 0) {
+        return arg1;
+    } else {
+        int64_t r_bit = (arg1 >> (b_arg2 - 1)) & 1;
+        return (arg1 >> b_arg2) + r_bit;
+    }
+}
+
+void helper_msa_srar_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_srar_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 static inline int64_t msa_srari_df(CPUMIPSState *env, uint32_t df, int64_t arg,
         uint32_t m)
 {
@@ -872,6 +2528,36 @@ void helper_msa_srari_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+static inline int64_t msa_srlr_df(CPUMIPSState *env, uint32_t df, int64_t arg1,
+        int64_t arg2)
+{
+    uint64_t u_arg1 = UNSIGNED(arg1, df);
+    int32_t b_arg2 = BIT_POSITION(arg2, df);
+    if (b_arg2 == 0) {
+        return u_arg1;
+    } else {
+        uint64_t r_bit = (u_arg1 >> (b_arg2 - 1)) & 1;
+        return (u_arg1 >> b_arg2) + r_bit;
+    }
+}
+
+void helper_msa_srlr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_srlr_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 static inline int64_t msa_srlri_df(CPUMIPSState *env, uint32_t df, int64_t arg,
         uint32_t m)
 {
@@ -899,3 +2585,56 @@ void helper_msa_srlri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         env->active_msa.msamodify |= (1 << wd);
     }
 }
+static inline void msa_sld_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, target_ulong rt)
+{
+    uint32_t n = rt % DF_ELEMENTS(df, MSA_WRLEN);
+    uint8_t v[64];
+    uint32_t i, k;
+#define CONCATENATE_AND_SLIDE(s, k)             \
+    do {                                        \
+        for (i = 0; i < s; i++) {               \
+            v[i]     = B(pws, s * k + i);       \
+            v[i + s] = B(pwd, s * k + i);       \
+        }                                       \
+        for (i = 0; i < s; i++) {               \
+            B(pwd, s * k + i) = v[i + n];       \
+        }                                       \
+    } while (0)
+
+    msa_check_index(env, df, n);
+    switch (df) {
+    case DF_BYTE:
+        CONCATENATE_AND_SLIDE(MSA_WRLEN/8, 0);
+        break;
+    case DF_HALF:
+        for (k = 0; k < 2; k++) {
+            CONCATENATE_AND_SLIDE(MSA_WRLEN/16, k);
+        }
+        break;
+    case DF_WORD:
+        for (k = 0; k < 4; k++) {
+            CONCATENATE_AND_SLIDE(MSA_WRLEN/32, k);
+        }
+        break;
+    case DF_DOUBLE:
+        for (k = 0; k < 8; k++) {
+            CONCATENATE_AND_SLIDE(MSA_WRLEN/64, k);
+        }
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
+
+void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t rt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    msa_sld_df(env, df, pwd, pws, env->active_tc.gpr[rt]);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
diff --git a/target-mips/translate.c b/target-mips/translate.c
index f97d3a9..e063531 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -15030,6 +15030,295 @@ static void gen_msa_bit(CPUMIPSState *env, DisasContext *ctx)
     tcg_temp_free_i32(tws);
 }
 
+static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx)
+{
+#define MASK_MSA_3R(op)    (MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
+    uint32_t opcode = ctx->opcode;
+
+    uint8_t df = (ctx->opcode >> 21) & 0x3 /* df [22:21] */;
+    uint8_t wt = (ctx->opcode >> 16) & 0x1f /* wt [20:16] */;
+    uint8_t ws = (ctx->opcode >> 11) & 0x1f /* ws [15:11] */;
+    uint8_t wd = (ctx->opcode >> 6) & 0x1f /* wd [10:6] */;
+
+    TCGv_i32 tdf = tcg_const_i32(df);
+    TCGv_i32 twd = tcg_const_i32(wd);
+    TCGv_i32 tws = tcg_const_i32(ws);
+    TCGv_i32 twt = tcg_const_i32(wt);
+
+    switch (MASK_MSA_3R(opcode)) {
+    case OPC_MSA_SLL_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_sll_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ADDV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_addv_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_CEQ_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ceq_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ADD_A_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_add_a_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SUBS_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_subs_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MULV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_mulv_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SLD_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_sld_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_VSHF_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_vshf_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SRA_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_sra_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SUBV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_subv_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ADDS_A_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_adds_a_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SUBS_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_subs_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MADDV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_maddv_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SPLAT_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_splat_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SRAR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_srar_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SRL_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_srl_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MAX_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_max_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_CLT_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_clt_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ADDS_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_adds_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SUBSUS_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_subsus_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MSUBV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_msubv_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_PCKEV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_pckev_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SRLR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_srlr_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_BCLR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_bclr_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MAX_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_max_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_CLT_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_clt_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ADDS_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_adds_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_SUBSUU_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_subsuu_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_PCKOD_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_pckod_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_BSET_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_bset_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MIN_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_min_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_CLE_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_cle_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_AVE_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ave_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ASUB_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_asub_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_DIV_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_div_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ILVL_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ilvl_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_BNEG_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_bneg_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MIN_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_min_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_CLE_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_cle_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_AVE_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ave_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ASUB_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_asub_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_DIV_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_div_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ILVR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ilvr_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_BINSL_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_binsl_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MAX_A_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_max_a_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_AVER_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_aver_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MOD_S_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_mod_s_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ILVEV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ilvev_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_BINSR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_binsr_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MIN_A_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_min_a_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_AVER_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_aver_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_MOD_U_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_mod_u_df(cpu_env, tdf, twd, tws, twt);
+        break;
+    case OPC_MSA_ILVOD_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ilvod_df(cpu_env, tdf, twd, tws, twt);
+        break;
+
+    case OPC_MSA_DOTP_S_df:
+    case OPC_MSA_DOTP_U_df:
+    case OPC_MSA_DPADD_S_df:
+    case OPC_MSA_DPADD_U_df:
+    case OPC_MSA_DPSUB_S_df:
+    case OPC_MSA_HADD_S_df:
+    case OPC_MSA_DPSUB_U_df:
+    case OPC_MSA_HADD_U_df:
+    case OPC_MSA_HSUB_S_df:
+    case OPC_MSA_HSUB_U_df:
+        if (df == 0) {
+            if (check_msa_access(env, ctx, -1, -1, -1)) {
+                generate_exception(ctx, EXCP_RI);
+            }
+        }
+        check_msa_access(env, ctx, wt, ws, wd);
+        switch (MASK_MSA_3R(opcode)) {
+        case OPC_MSA_DOTP_S_df:
+            gen_helper_msa_dotp_s_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_DOTP_U_df:
+            gen_helper_msa_dotp_u_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_DPADD_S_df:
+            gen_helper_msa_dpadd_s_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_DPADD_U_df:
+            gen_helper_msa_dpadd_u_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_DPSUB_S_df:
+            gen_helper_msa_dpsub_s_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_HADD_S_df:
+            gen_helper_msa_hadd_s_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_DPSUB_U_df:
+            gen_helper_msa_dpsub_u_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_HADD_U_df:
+            gen_helper_msa_hadd_u_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_HSUB_S_df:
+            gen_helper_msa_hsub_s_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        case OPC_MSA_HSUB_U_df:
+            gen_helper_msa_hsub_u_df(cpu_env, tdf, twd, tws, twt);
+            break;
+        }
+        break;
+    default:
+        MIPS_INVAL("MSA instruction");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+    tcg_temp_free_i32(twd);
+    tcg_temp_free_i32(tws);
+    tcg_temp_free_i32(twt);
+    tcg_temp_free_i32(tdf);
+}
+
 static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
 {
     uint32_t opcode = ctx->opcode;
@@ -15049,6 +15338,17 @@ static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
     case OPC_MSA_BIT_0A:
         gen_msa_bit(env, ctx);
         break;
+    case OPC_MSA_3R_0D:
+    case OPC_MSA_3R_0E:
+    case OPC_MSA_3R_0F:
+    case OPC_MSA_3R_10:
+    case OPC_MSA_3R_11:
+    case OPC_MSA_3R_12:
+    case OPC_MSA_3R_13:
+    case OPC_MSA_3R_14:
+    case OPC_MSA_3R_15:
+        gen_msa_3r(env, ctx);
+        break;
     default:
         MIPS_INVAL("MSA instruction");
         generate_exception(ctx, EXCP_RI);
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 14/20] target-mips: add MSA ELM format instructions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (12 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 13/20] target-mips: add MSA 3R " Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 15/20] target-mips: add MSA 3RF " Yongbok Kim
                   ` (5 subsequent siblings)
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA ELM format instructions

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.h     |    9 ++
 target-mips/msa_helper.c |  239 ++++++++++++++++++++++++++++++++++++++++++++++
 target-mips/translate.c  |  136 ++++++++++++++++++++++++++
 3 files changed, 384 insertions(+), 0 deletions(-)

diff --git a/target-mips/helper.h b/target-mips/helper.h
index 00705c4..e13daec 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -720,6 +720,7 @@ DEF_HELPER_5(msa_bset_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_bseti_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_ceq_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_ceqi_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_2(msa_cfcmsa, tl, env, i32)
 DEF_HELPER_5(msa_cle_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_cle_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_clei_s_df, void, env, i32, i32, i32, s64)
@@ -728,6 +729,9 @@ DEF_HELPER_5(msa_clt_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_clt_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_clti_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_clti_u_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_5(msa_copy_s_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_copy_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_3(msa_ctcmsa, void, env, tl, i32)
 DEF_HELPER_5(msa_div_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_div_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_dotp_s_df, void, env, i32, i32, i32, i32)
@@ -744,6 +748,8 @@ DEF_HELPER_5(msa_ilvev_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_ilvl_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_ilvod_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_ilvr_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_insert_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_insve_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_ldi_df, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_maddv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_max_a_df, void, env, i32, i32, i32, i32)
@@ -758,6 +764,7 @@ DEF_HELPER_5(msa_mini_s_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_mini_u_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_mod_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_mod_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_3(msa_move_v, void, env, i32, i32)
 DEF_HELPER_5(msa_msubv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_mulv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_nori_b, void, env, i32, i32, i32)
@@ -768,9 +775,11 @@ DEF_HELPER_5(msa_sat_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_sat_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_shf_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_sld_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_sldi_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_sll_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_slli_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_splat_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_splati_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_sra_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srai_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srar_df, void, env, i32, i32, i32, i32)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index bb4ea65..220a0cd 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -1166,6 +1166,59 @@ void helper_msa_clti_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+void helper_msa_copy_s_df(CPUMIPSState *env, uint32_t df, uint32_t rd,
+        uint32_t ws, uint32_t n)
+{
+    n %= DF_ELEMENTS(df, MSA_WRLEN);
+    msa_check_index(env, (uint32_t)df, (uint32_t)n);
+    switch (df) {
+    case DF_BYTE: /* b */
+        env->active_tc.gpr[rd] = (int8_t)env->active_fpu.fpr[ws].wr.b[n];
+        break;
+    case DF_HALF: /* h */
+        env->active_tc.gpr[rd] = (int16_t)env->active_fpu.fpr[ws].wr.h[n];
+        break;
+    case DF_WORD: /* w */
+        env->active_tc.gpr[rd] = (int32_t)env->active_fpu.fpr[ws].wr.w[n];
+        break;
+#ifdef TARGET_MIPS64
+    case DF_DOUBLE: /* d */
+        env->active_tc.gpr[rd] = (int64_t)env->active_fpu.fpr[ws].wr.d[n];
+        break;
+#endif
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
+
+void helper_msa_copy_u_df(CPUMIPSState *env, uint32_t df, uint32_t rd,
+        uint32_t ws, uint32_t n)
+{
+    n %= DF_ELEMENTS(df, MSA_WRLEN);
+    msa_check_index(env, (uint32_t)df, (uint32_t)n);
+    switch (df) {
+    case DF_BYTE: /* b */
+        env->active_tc.gpr[rd] = (uint8_t)env->active_fpu.fpr[ws].wr.b[n];
+        break;
+    case DF_HALF: /* h */
+        env->active_tc.gpr[rd] = (uint16_t)env->active_fpu.fpr[ws].wr.h[n];
+        break;
+    case DF_WORD: /* w */
+        env->active_tc.gpr[rd] = (uint32_t)env->active_fpu.fpr[ws].wr.w[n];
+        break;
+#ifdef TARGET_MIPS64
+    case DF_DOUBLE: /* d */
+        env->active_tc.gpr[rd] = (uint64_t)env->active_fpu.fpr[ws].wr.d[n];
+        break;
+#endif
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
+
+
 #define SIGNED_EVEN(a, df) \
         ((((int64_t)(a)) << (64 - DF_BITS(df)/2)) >> (64 - DF_BITS(df)/2))
 #define UNSIGNED_EVEN(a, df) \
@@ -2142,6 +2195,27 @@ void helper_msa_splat_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+void helper_msa_splati_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t n)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    msa_splat_df(env, df, pwd, pws, n);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+void helper_msa_move_v(CPUMIPSState *env, uint32_t wd, uint32_t ws)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    msa_move_v(pwd, pws);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t s10)
 {
@@ -2177,6 +2251,73 @@ void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+static inline void msa_insert_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        target_ulong rs, uint32_t n)
+{
+    msa_check_index(env, df, n);
+    switch (df) {
+    case DF_BYTE:
+        B(pwd, n)   = (int8_t)rs;
+        break;
+    case DF_HALF:
+        H(pwd, n)   = (int16_t)rs;
+        break;
+    case DF_WORD:
+        W(pwd, n)   = (int32_t)rs;
+        break;
+    case DF_DOUBLE:
+        D(pwd, n)   = (int64_t)rs;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
+
+void helper_msa_insert_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t rs, uint32_t n)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    msa_insert_df(env, df, pwd, env->active_tc.gpr[rs], n);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_insve_df(CPUMIPSState *env, uint32_t df, void *pwd,
+        void *pws, uint32_t n)
+{
+    msa_check_index(env, df, n);
+    switch (df) {
+    case DF_BYTE:
+        B(pwd, n)   = (int8_t)B(pws, 0);
+        break;
+    case DF_HALF:
+        H(pwd, n)   = (int16_t)H(pws, 0);
+        break;
+    case DF_WORD:
+        W(pwd, n)   = (int32_t)W(pws, 0);
+        break;
+    case DF_DOUBLE:
+        D(pwd, n)   = (int64_t)D(pws, 0);
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+}
+
+void helper_msa_insve_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t n)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    msa_insve_df(env, df, pwd, pws, n);
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_mulv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, uint32_t wt)
 {
@@ -2638,3 +2779,101 @@ void helper_msa_sld_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         env->active_msa.msamodify |= (1 << wd);
     }
 }
+
+void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t n)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    msa_sld_df(env, df, pwd, pws, n);
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
+{
+    switch (cs) {
+    case MSAIR_REGISTER:
+        return env->active_msa.msair;
+    case MSACSR_REGISTER:
+        return env->active_msa.msacsr & MSACSR_BITS;
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        switch (cs) {
+        case MSAACCESS_REGISTER:
+            return env->active_msa.msaaccess;
+        case MSASAVE_REGISTER:
+            return env->active_msa.msasave;
+        case MSAMODIFY_REGISTER:
+            return env->active_msa.msamodify;
+        case MSAREQUEST_REGISTER:
+            return env->active_msa.msarequest;
+        case MSAMAP_REGISTER:
+            return env->active_msa.msamap;
+        case MSAUNMAP_REGISTER:
+            return env->active_msa.msaunmap;
+        }
+    }
+    return 0;
+}
+
+void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
+{
+    switch (cd) {
+    case MSAIR_REGISTER:
+        break;
+    case MSACSR_REGISTER:
+        env->active_msa.msacsr = (int32_t)elm & MSACSR_BITS;
+
+        /* set float_status rounding mode */
+        set_float_rounding_mode(
+            ieee_rm[(env->active_msa.msacsr & MSACSR_RM_MASK) >> MSACSR_RM_POS],
+            &env->active_msa.fp_status);
+
+        /* set float_status flush modes */
+        set_flush_to_zero(
+          (env->active_msa.msacsr & MSACSR_FS_BIT) != 0 ? 1 : 0,
+          &env->active_msa.fp_status);
+        set_flush_inputs_to_zero(
+          (env->active_msa.msacsr & MSACSR_FS_BIT) != 0 ? 1 : 0,
+          &env->active_msa.fp_status);
+
+        /* check exception */
+        if ((GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED)
+            & GET_FP_CAUSE(env->active_msa.msacsr)) {
+            helper_raise_exception(env, EXCP_MSAFPE);
+        }
+        break;
+    case MSAACCESS_REGISTER:
+        break;
+    case MSASAVE_REGISTER:
+        if (env->active_msa.msair & MSAIR_WRP_BIT) {
+            env->active_msa.msasave = (int32_t)elm;
+        }
+        break;
+    case MSAMODIFY_REGISTER:
+        if (env->active_msa.msair & MSAIR_WRP_BIT) {
+            env->active_msa.msamodify = (int32_t)elm;
+        }
+        break;
+    case MSAREQUEST_REGISTER:
+        break;
+    case MSAMAP_REGISTER:
+        if (env->active_msa.msair & MSAIR_WRP_BIT) {
+            env->active_msa.msamap = (int32_t)elm;
+            env->active_msa.msaaccess |= 1 << (int32_t)elm;
+            return;
+        }
+        break;
+    case MSAUNMAP_REGISTER:
+        if (env->active_msa.msair & MSAIR_WRP_BIT) {
+            env->active_msa.msaunmap = (int32_t)elm;
+            env->active_msa.msaaccess &= ~(1 << (int32_t)elm);
+        }
+        break;
+    }
+}
diff --git a/target-mips/translate.c b/target-mips/translate.c
index e063531..6c8caa4 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -15319,6 +15319,139 @@ static void gen_msa_3r(CPUMIPSState *env, DisasContext *ctx)
     tcg_temp_free_i32(tdf);
 }
 
+static void gen_msa_elm(CPUMIPSState *env, DisasContext *ctx)
+{
+#define MASK_MSA_ELM(op)    (MASK_MSA_MINOR(op) | (op & (0xf << 22)))
+#define MASK_MSA_ELM_DF3E(op)   (MASK_MSA_MINOR(op) | (op & (0x3FF << 16)))
+    uint32_t opcode = ctx->opcode;
+
+    uint8_t dfn = (ctx->opcode >> 16) & 0x3f /* dfn [21:16] */;
+
+    uint32_t df = 0, n = 0;
+
+    if ((dfn & 0x20) == 0x00) {         /* byte data format */
+        n = dfn & 0x1f;
+        df = 0;
+    } else if ((dfn & 0x30) == 0x20) {  /* half data format */
+        n = dfn & 0x0f;
+        df = 1;
+    } else if ((dfn & 0x38) == 0x30) {  /* word data format */
+        n = dfn & 0x07;
+        df = 2;
+    } else if ((dfn & 0x3c) == 0x38) {  /* double data format */
+        n = dfn & 0x3;
+        df = 3;
+    } else if (dfn == 0x3E) {  /* CTCMSA, CFCMSA, MOVE.V */
+        df = 4;
+    } else {
+        if (check_msa_access(env, ctx, -1, -1, -1)) {
+            generate_exception(ctx, EXCP_RI);
+        }
+        return;
+    }
+
+    if (df == 4) {
+        uint8_t source = (ctx->opcode >> 11) & 0x1f /* rs/cs/ws [15:11] */;
+        uint8_t dest = (ctx->opcode >> 6) & 0x1f /* cd/rd/wd [10:6] */;
+        TCGv telm = tcg_temp_new();
+        TCGv_i32 tsr = tcg_const_i32(source);
+        TCGv_i32 tdt = tcg_const_i32(dest);
+
+        switch (MASK_MSA_ELM_DF3E(opcode)) {
+        case OPC_MSA_CTCMSA:
+            {
+                check_msa_access(env, ctx, -1, -1, -1);
+                gen_load_gpr(telm, source);
+                gen_helper_msa_ctcmsa(cpu_env, telm, tdt);
+            }
+            break;
+        case OPC_MSA_CFCMSA:
+            {
+                check_msa_access(env, ctx, -1, -1, -1);
+                gen_helper_msa_cfcmsa(telm, cpu_env, tsr);
+                gen_store_gpr(telm, dest);
+            }
+            break;
+        case OPC_MSA_MOVE_V:
+            {
+                check_msa_access(env, ctx, -1, -1, -1);
+                gen_helper_msa_move_v(cpu_env, tdt, tsr);
+            }
+            break;
+        default:
+            MIPS_INVAL("MSA instruction");
+            generate_exception(ctx, EXCP_RI);
+            break;
+        }
+
+        tcg_temp_free(telm);
+        tcg_temp_free_i32(tdt);
+        tcg_temp_free_i32(tsr);
+    } else {
+        int df_bits = 8 * (1 << df);
+        if (n >= MSA_WRLEN / df_bits) {
+            if (check_msa_access(env, ctx, -1, -1, -1)) {
+                generate_exception(ctx, EXCP_RI);
+            }
+        } else {
+            uint8_t ws = (ctx->opcode >> 11) & 0x1f /* ws [15:11] */;
+            uint8_t wd = (ctx->opcode >> 6) & 0x1f /* wd [10:6] */;
+
+            TCGv_i32 tws = tcg_const_i32(ws);
+            TCGv_i32 twd = tcg_const_i32(wd);
+            TCGv_i32 tn  = tcg_const_i32(n);
+            TCGv_i32 tdf = tcg_const_i32(df);
+
+            switch (MASK_MSA_ELM(opcode)) {
+            case OPC_MSA_SLDI_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_sldi_df(cpu_env, tdf, twd, tws, tn);
+                break;
+            case OPC_MSA_SPLATI_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_splati_df(cpu_env, tdf, twd, tws, tn);
+                break;
+            case OPC_MSA_INSVE_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_insve_df(cpu_env, tdf, twd, tws, tn);
+                break;
+            case OPC_MSA_COPY_S_df:
+            case OPC_MSA_COPY_U_df:
+            case OPC_MSA_INSERT_df:
+#if !defined(TARGET_MIPS64)
+                /* Double format valid only for MIPS64 */
+                if (df == 3) {
+                    if (check_msa_access(env, ctx, -1, -1, -1)) {
+                        generate_exception(ctx, EXCP_RI);
+                    }
+                    break;
+                }
+#endif
+                check_msa_access(env, ctx, -1, ws, wd);
+                switch (MASK_MSA_ELM(opcode)) {
+                case OPC_MSA_COPY_S_df:
+                    gen_helper_msa_copy_s_df(cpu_env, tdf, twd, tws, tn);
+                    break;
+                case OPC_MSA_COPY_U_df:
+                    gen_helper_msa_copy_u_df(cpu_env, tdf, twd, tws, tn);
+                    break;
+                case OPC_MSA_INSERT_df:
+                    gen_helper_msa_insert_df(cpu_env, tdf, twd, tws, tn);
+                    break;
+                }
+                break;
+            default:
+                MIPS_INVAL("MSA instruction");
+                generate_exception(ctx, EXCP_RI);
+            }
+            tcg_temp_free_i32(twd);
+            tcg_temp_free_i32(tws);
+            tcg_temp_free_i32(tn);
+            tcg_temp_free_i32(tdf);
+        }
+    }
+}
+
 static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
 {
     uint32_t opcode = ctx->opcode;
@@ -15349,6 +15482,9 @@ static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
     case OPC_MSA_3R_15:
         gen_msa_3r(env, ctx);
         break;
+    case OPC_MSA_ELM:
+        gen_msa_elm(env, ctx);
+        break;
     default:
         MIPS_INVAL("MSA instruction");
         generate_exception(ctx, EXCP_RI);
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 15/20] target-mips: add MSA 3RF format instructions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (13 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 14/20] target-mips: add MSA ELM " Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 16/20] target-mips: add MSA VEC/2R " Yongbok Kim
                   ` (4 subsequent siblings)
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA 3RF format instructions

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.h     |   41 ++
 target-mips/msa_helper.c | 1575 +++++++++++++++++++++++++++++++++++++++++++++-
 target-mips/translate.c  |  203 ++++++
 3 files changed, 1816 insertions(+), 3 deletions(-)

diff --git a/target-mips/helper.h b/target-mips/helper.h
index e13daec..fec21b6 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -740,6 +740,41 @@ DEF_HELPER_5(msa_dpadd_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_dpadd_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_dpsub_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_dpsub_u_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fadd_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fcaf_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fceq_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fcle_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fclt_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fcne_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fcor_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fcueq_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fcule_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fcult_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fcun_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fcune_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fdiv_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fexdo_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fexp2_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fmadd_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fmax_a_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fmax_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fmin_a_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fmin_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fmsub_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fmul_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fsaf_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fseq_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fsle_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fslt_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fsne_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fsor_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fsub_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fsueq_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fsule_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fsult_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fsun_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_fsune_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ftq_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_hadd_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_hadd_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_hsub_s_df, void, env, i32, i32, i32, i32)
@@ -751,6 +786,8 @@ DEF_HELPER_5(msa_ilvr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_insert_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_insve_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_ldi_df, void, env, i32, i32, i32)
+DEF_HELPER_5(msa_madd_q_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_maddr_q_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_maddv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_max_a_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_max_s_df, void, env, i32, i32, i32, i32)
@@ -765,7 +802,11 @@ DEF_HELPER_5(msa_mini_u_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_mod_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_mod_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_3(msa_move_v, void, env, i32, i32)
+DEF_HELPER_5(msa_msub_q_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_msubr_q_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_msubv_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_mul_q_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_mulr_q_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_mulv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_4(msa_nori_b, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ori_b, void, env, i32, i32, i32)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index 220a0cd..bb4ab66 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -2793,6 +2793,1578 @@ void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+static inline int64_t msa_mul_q_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    int64_t q_min  = DF_MIN_INT(df);
+    int64_t q_max  = DF_MAX_INT(df);
+
+    if (arg1 == q_min && arg2 == q_min) {
+        return q_max;
+    }
+
+    return (arg1 * arg2) >> (DF_BITS(df) - 1);
+}
+
+void helper_msa_mul_q_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_mul_q_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_mulr_q_df(CPUMIPSState *env, uint32_t df,
+        int64_t arg1, int64_t arg2)
+{
+    int64_t q_min  = DF_MIN_INT(df);
+    int64_t q_max  = DF_MAX_INT(df);
+    int64_t r_bit  = 1 << (DF_BITS(df) - 2);
+
+    if (arg1 == q_min && arg2 == q_min) {
+        return q_max;
+    }
+
+    return (arg1 * arg2 + r_bit) >> (DF_BITS(df) - 1);
+}
+
+void helper_msa_mulr_q_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_mulr_q_df(env, df, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_madd_q_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    int64_t q_prod, q_ret;
+
+    int64_t q_max  = DF_MAX_INT(df);
+    int64_t q_min  = DF_MIN_INT(df);
+
+    q_prod = arg1 * arg2;
+    q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod) >> (DF_BITS(df) - 1);
+
+    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
+}
+
+void helper_msa_madd_q_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_madd_q_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_maddr_q_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    int64_t q_prod, q_ret;
+
+    int64_t q_max  = DF_MAX_INT(df);
+    int64_t q_min  = DF_MIN_INT(df);
+    int64_t r_bit  = 1 << (DF_BITS(df) - 2);
+
+    q_prod = arg1 * arg2;
+    q_ret = ((dest << (DF_BITS(df) - 1)) + q_prod + r_bit) >> (DF_BITS(df) - 1);
+
+    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
+}
+
+void helper_msa_maddr_q_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_maddr_q_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_msub_q_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    int64_t q_prod, q_ret;
+
+    int64_t q_max  = DF_MAX_INT(df);
+    int64_t q_min  = DF_MIN_INT(df);
+
+    q_prod = arg1 * arg2;
+    q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod) >> (DF_BITS(df) - 1);
+
+    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
+}
+
+void helper_msa_msub_q_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_msub_q_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_msubr_q_df(CPUMIPSState *env, uint32_t df,
+        int64_t dest, int64_t arg1, int64_t arg2)
+{
+    int64_t q_prod, q_ret;
+
+    int64_t q_max  = DF_MAX_INT(df);
+    int64_t q_min  = DF_MIN_INT(df);
+    int64_t r_bit  = 1 << (DF_BITS(df) - 2);
+
+    q_prod = arg1 * arg2;
+    q_ret = ((dest << (DF_BITS(df) - 1)) - q_prod + r_bit) >> (DF_BITS(df) - 1);
+
+    return (q_ret < q_min) ? q_min : (q_max < q_ret) ? q_max : q_ret;
+}
+
+void helper_msa_msubr_q_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    int64_t td, ts, tt;
+    int i;
+    int df_bits = 8 * (1 << df);
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        tt = msa_load_wr_elem_s64(env, wt, df, i);
+        td = msa_load_wr_elem_s64(env, wd, df, i);
+        td = msa_msubr_q_df(env, df, td, ts, tt);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+#define FLOAT_SNAN16 (float16_default_nan ^ 0x0220)
+        /* 0x7c20 */
+#define FLOAT_SNAN32 (float32_default_nan ^ 0x00400020)
+        /* 0x7f800020 */
+#define FLOAT_SNAN64 (float64_default_nan ^ 0x0008000000000020ULL)
+        /* 0x7ff0000000000020 */
+
+static inline void clear_msacsr_cause(CPUMIPSState *env)
+{
+    SET_FP_CAUSE(env->active_msa.msacsr, 0);
+}
+
+static inline void check_msacsr_cause(CPUMIPSState *env)
+{
+    if ((GET_FP_CAUSE(env->active_msa.msacsr) &
+            (GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED)) == 0) {
+        UPDATE_FP_FLAGS(env->active_msa.msacsr,
+                GET_FP_CAUSE(env->active_msa.msacsr));
+    } else {
+        helper_raise_exception(env, EXCP_MSAFPE);
+    }
+}
+
+/* Flush-to-zero use cases for update_msacsr() */
+#define CLEAR_FS_UNDERFLOW 1
+#define CLEAR_IS_INEXACT   2
+#define RECIPROCAL_INEXACT 4
+
+static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
+{
+    int ieee_ex;
+
+    int c;
+    int cause;
+    int enable;
+
+    ieee_ex = get_float_exception_flags(&env->active_msa.fp_status);
+
+    /* QEMU softfloat does not signal all underflow cases */
+    if (denormal) {
+        ieee_ex |= float_flag_underflow;
+    }
+
+    c = ieee_ex_to_mips(ieee_ex);
+    enable = GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED;
+
+    /* Set Inexact (I) when flushing inputs to zero */
+    if ((ieee_ex & float_flag_input_denormal) &&
+            (env->active_msa.msacsr & MSACSR_FS_BIT) != 0) {
+        if (action & CLEAR_IS_INEXACT) {
+            c &= ~FP_INEXACT;
+        } else {
+            c |=  FP_INEXACT;
+        }
+    }
+
+    /* Set Inexact (I) and Underflow (U) when flushing outputs to zero */
+    if ((ieee_ex & float_flag_output_denormal) &&
+            (env->active_msa.msacsr & MSACSR_FS_BIT) != 0) {
+        c |= FP_INEXACT;
+        if (action & CLEAR_FS_UNDERFLOW) {
+            c &= ~FP_UNDERFLOW;
+        } else {
+            c |=  FP_UNDERFLOW;
+        }
+    }
+
+    /* Set Inexact (I) when Overflow (O) is not enabled */
+    if ((c & FP_OVERFLOW) != 0 && (enable & FP_OVERFLOW) == 0) {
+        c |= FP_INEXACT;
+    }
+
+    /* Clear Exact Underflow when Underflow (U) is not enabled */
+    if ((c & FP_UNDERFLOW) != 0 && (enable & FP_UNDERFLOW) == 0 &&
+            (c & FP_INEXACT) == 0) {
+        c &= ~FP_UNDERFLOW;
+    }
+
+    /* Reciprocal operations set only Inexact when valid and not
+       divide by zero */
+    if ((action & RECIPROCAL_INEXACT) &&
+            (c & (FP_INVALID | FP_DIV0)) == 0) {
+        c = FP_INEXACT;
+    }
+
+    cause = c & enable;    /* all current enabled exceptions */
+
+    if (cause == 0) {
+        /* No enabled exception, update the MSACSR Cause
+         with all current exceptions */
+        SET_FP_CAUSE(env->active_msa.msacsr,
+                (GET_FP_CAUSE(env->active_msa.msacsr) | c));
+    } else {
+        /* Current exceptions are enabled */
+        if ((env->active_msa.msacsr & MSACSR_NX_BIT) == 0) {
+            /* Exception(s) will trap, update MSACSR Cause
+           with all enabled exceptions */
+            SET_FP_CAUSE(env->active_msa.msacsr,
+                    (GET_FP_CAUSE(env->active_msa.msacsr) | c));
+        }
+    }
+
+    return c;
+}
+
+#define float16_is_zero(ARG) 0
+#define float16_is_zero_or_denormal(ARG) 0
+
+#define IS_DENORMAL(ARG, BITS)                      \
+    (!float ## BITS ## _is_zero(ARG)                \
+    && float ## BITS ## _is_zero_or_denormal(ARG))
+
+#define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
+    do {                                                                    \
+        int c;                                                              \
+        int cause;                                                          \
+        int enable;                                                         \
+                                                                            \
+        set_float_exception_flags(0, &env->active_msa.fp_status);           \
+        DEST = float ## BITS ## _ ## OP(ARG, &env->active_msa.fp_status);   \
+        c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
+        enable = GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED;  \
+        cause = c & enable;                                                 \
+                                                                            \
+        if (cause) {                                                        \
+            DEST = ((FLOAT_SNAN ## XBITS >> 6) << 6) | c;                   \
+        }                                                                   \
+    } while (0)
+
+#define MSA_FLOAT_UNOP(DEST, OP, ARG, BITS)                                 \
+    do {                                                                    \
+        int c;                                                              \
+        int cause;                                                          \
+        int enable;                                                         \
+                                                                            \
+        set_float_exception_flags(0, &env->active_msa.fp_status);           \
+        DEST = float ## BITS ## _ ## OP(ARG, &env->active_msa.fp_status);   \
+        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
+        enable = GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED;  \
+        cause = c & enable;                                                 \
+                                                                            \
+        if (cause) {                                                        \
+            DEST = ((FLOAT_SNAN ## BITS >> 6) << 6) | c;                    \
+        }                                                                   \
+    } while (0)
+
+#define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
+    do {                                                                    \
+        int c;                                                              \
+        int cause;                                                          \
+        int enable;                                                         \
+                                                                            \
+        set_float_exception_flags(0, &env->active_msa.fp_status);           \
+        DEST = float ## BITS ## _ ## OP(ARG1, ARG2,                         \
+                                        &env->active_msa.fp_status);        \
+        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
+        enable = GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED;  \
+        cause = c & enable;                                                 \
+                                                                            \
+        if (cause) {                                                        \
+            DEST = ((FLOAT_SNAN ## BITS >> 6) << 6) | c;                    \
+        }                                                                   \
+    } while (0)
+
+#define MSA_FLOAT_MAXOP(DEST, OP, ARG1, ARG2, BITS)                         \
+    do {                                                                    \
+        int c;                                                              \
+        int cause;                                                          \
+        int enable;                                                         \
+                                                                            \
+        set_float_exception_flags(0, &env->active_msa.fp_status);           \
+        DEST = float ## BITS ## _ ## OP(ARG1, ARG2,                         \
+                                        &env->active_msa.fp_status);        \
+        c = update_msacsr(env, 0, 0);                                       \
+        enable = GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED;  \
+        cause = c & enable;                                                 \
+                                                                            \
+        if (cause) {                                                        \
+            DEST = ((FLOAT_SNAN ## BITS >> 6) << 6) | c;                    \
+        }                                                                   \
+    } while (0)
+
+#define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
+    do {                                                                    \
+        int c;                                                              \
+        int cause;                                                          \
+        int enable;                                                         \
+                                                                            \
+        set_float_exception_flags(0, &env->active_msa.fp_status);           \
+        DEST = float ## BITS ## _muladd(ARG2, ARG3, ARG1, NEGATE,           \
+                                        &env->active_msa.fp_status);        \
+        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
+        enable = GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED;  \
+        cause = c & enable;                                                 \
+                                                                            \
+        if (cause) {                                                        \
+            DEST = ((FLOAT_SNAN ## BITS >> 6) << 6) | c;                    \
+        }                                                                   \
+    } while (0)
+
+#define NUMBER_QNAN_PAIR(ARG1, ARG2, BITS)      \
+    !float ## BITS ## _is_any_nan(ARG1)         \
+    && float ## BITS ## _is_quiet_nan(ARG2)
+
+void helper_msa_fadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_BINOP(W(pwx, i), add, W(pws, i), W(pwt, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_BINOP(D(pwx, i), add, D(pws, i), D(pwt, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_BINOP(W(pwx, i), sub, W(pws, i), W(pwt, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_BINOP(D(pwx, i), sub, D(pws, i), D(pwt, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fmul_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_BINOP(W(pwx, i), mul, W(pws, i), W(pwt, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_BINOP(D(pwx, i), mul, D(pws, i), D(pwt, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_BINOP(W(pwx, i), div, W(pws, i), W(pwt, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_BINOP(D(pwx, i), div, D(pws, i), D(pwt, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_BINOP(W(pwx, i), scalbn, W(pws, i),
+                            W(pwt, i) >  0x200 ?  0x200 :
+                            W(pwt, i) < -0x200 ? -0x200 : W(pwt, i),
+                            32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_BINOP(D(pwx, i), scalbn, D(pws, i),
+                            D(pwt, i) >  0x1000 ?  0x1000 :
+                            D(pwt, i) < -0x1000 ? -0x1000 : D(pwt, i),
+                            64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_MULADD(W(pwx, i), W(pwd, i),
+                           W(pws, i), W(pwt, i), 0, 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_MULADD(D(pwx, i), D(pwd, i),
+                           D(pws, i), D(pwt, i), 0, 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fmsub_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_MULADD(W(pwx, i), W(pwd, i),
+                           W(pws, i), W(pwt, i),
+                           float_muladd_negate_product, 32);
+      } DONE_ALL_ELEMENTS;
+      break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_MULADD(D(pwx, i), D(pwd, i),
+                           D(pws, i), D(pwt, i),
+                           float_muladd_negate_product, 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+#define FMAXMIN_A(F, G, X, _S, _T, BITS)                            \
+{                                                                   \
+    uint## BITS ##_t S = _S, T = _T;                                \
+    if (NUMBER_QNAN_PAIR(S, T, BITS)) {                             \
+        T = S;                                                      \
+    }                                                               \
+    else if (NUMBER_QNAN_PAIR(T, S, BITS)) {                        \
+        S = T;                                                      \
+    }                                                               \
+    uint## BITS ##_t as = float## BITS ##_abs(S);                   \
+    uint## BITS ##_t at = float## BITS ##_abs(T);                   \
+    uint## BITS ##_t xs, xt, xd;                                    \
+    MSA_FLOAT_MAXOP(xs, F,  S,  T, BITS);                           \
+    MSA_FLOAT_MAXOP(xt, G,  S,  T, BITS);                           \
+    MSA_FLOAT_MAXOP(xd, F, as, at, BITS);                           \
+    X = (as == at || xd == float## BITS ##_abs(xs)) ? xs : xt;      \
+}
+
+void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            FMAXMIN_A(max, min, W(pwx, i), W(pws, i), W(pwt, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+           FMAXMIN_A(max, min, D(pwx, i), D(pws, i), D(pwt, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            if (NUMBER_QNAN_PAIR(W(pws, i), W(pwt, i), 32)) {
+                MSA_FLOAT_MAXOP(W(pwx, i), max, W(pws, i), W(pws, i), 32);
+            } else if (NUMBER_QNAN_PAIR(W(pwt, i), W(pws, i), 32)) {
+                MSA_FLOAT_MAXOP(W(pwx, i), max, W(pwt, i), W(pwt, i), 32);
+            } else {
+                MSA_FLOAT_MAXOP(W(pwx, i), max, W(pws, i), W(pwt, i), 32);
+            }
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            if (NUMBER_QNAN_PAIR(D(pws, i), D(pwt, i), 64)) {
+                MSA_FLOAT_MAXOP(D(pwx, i), max, D(pws, i), D(pws, i), 64);
+            } else if (NUMBER_QNAN_PAIR(D(pwt, i), D(pws, i), 64)) {
+                MSA_FLOAT_MAXOP(D(pwx, i), max, D(pwt, i), D(pwt, i), 64);
+            } else {
+                MSA_FLOAT_MAXOP(D(pwx, i), max, D(pws, i), D(pwt, i), 64);
+            }
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            FMAXMIN_A(min, max, W(pwx, i), W(pws, i), W(pwt, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            FMAXMIN_A(min, max, D(pwx, i), D(pws, i), D(pwt, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            if (NUMBER_QNAN_PAIR(W(pws, i), W(pwt, i), 32)) {
+                MSA_FLOAT_MAXOP(W(pwx, i), min, W(pws, i), W(pws, i), 32);
+            } else if (NUMBER_QNAN_PAIR(W(pwt, i), W(pws, i), 32)) {
+                MSA_FLOAT_MAXOP(W(pwx, i), min, W(pwt, i), W(pwt, i), 32);
+            } else {
+                MSA_FLOAT_MAXOP(W(pwx, i), min, W(pws, i), W(pwt, i), 32);
+            }
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            if (NUMBER_QNAN_PAIR(D(pws, i), D(pwt, i), 64)) {
+                MSA_FLOAT_MAXOP(D(pwx, i), min, D(pws, i), D(pws, i), 64);
+            } else if (NUMBER_QNAN_PAIR(D(pwt, i), D(pws, i), 64)) {
+                MSA_FLOAT_MAXOP(D(pwx, i), min, D(pwt, i), D(pwt, i), 64);
+            } else {
+                MSA_FLOAT_MAXOP(D(pwx, i), min, D(pws, i), D(pwt, i), 64);
+            }
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+#define MSA_FLOAT_COND(DEST, OP, ARG1, ARG2, BITS, QUIET)                   \
+    do {                                                                    \
+        int c;                                                              \
+        int cause;                                                          \
+        int enable;                                                         \
+        int64_t cond;                                                       \
+        set_float_exception_flags(0, &env->active_msa.fp_status);           \
+        if (!QUIET) {                                                       \
+            cond = float ## BITS ## _ ## OP(ARG1, ARG2,                     \
+                                          &env->active_msa.fp_status);      \
+        } else {                                                            \
+            cond = float ## BITS ## _ ## OP ## _quiet(ARG1, ARG2,           \
+                                               &env->active_msa.fp_status); \
+        }                                                                   \
+        DEST = cond ? M_MAX_UINT(BITS) : 0;                                 \
+        c = update_msacsr(env, CLEAR_IS_INEXACT, 0);                        \
+        enable = GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED;  \
+        cause = c & enable;                                                 \
+                                                                            \
+        if (cause) {                                                        \
+            DEST = ((FLOAT_SNAN ## BITS >> 6) << 6) | c;                    \
+        }                                                                   \
+    } while (0)
+
+#define MSA_FLOAT_AF(DEST, ARG1, ARG2, BITS, QUIET)                 \
+    do {                                                            \
+        MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);          \
+        if ((DEST & M_MAX_UINT(BITS)) == M_MAX_UINT(BITS)) {        \
+            DEST = 0;                                               \
+        }                                                           \
+    } while (0)
+
+#define MSA_FLOAT_UEQ(DEST, ARG1, ARG2, BITS, QUIET)                \
+    do {                                                            \
+        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
+        if (DEST == 0) {                                            \
+            MSA_FLOAT_COND(DEST, eq, ARG1, ARG2, BITS, QUIET);      \
+        }                                                           \
+    } while (0)
+
+#define MSA_FLOAT_NE(DEST, ARG1, ARG2, BITS, QUIET)                 \
+    do {                                                            \
+        MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);          \
+        if (DEST == 0) {                                            \
+            MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);      \
+        }                                                           \
+    } while (0)
+
+#define MSA_FLOAT_UNE(DEST, ARG1, ARG2, BITS, QUIET)                \
+    do {                                                            \
+        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
+        if (DEST == 0) {                                            \
+            MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
+            if (DEST == 0) {                                        \
+                MSA_FLOAT_COND(DEST, lt, ARG2, ARG1, BITS, QUIET);  \
+            }                                                       \
+        }                                                           \
+    } while (0)
+
+#define MSA_FLOAT_ULE(DEST, ARG1, ARG2, BITS, QUIET)                \
+    do {                                                            \
+        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
+        if (DEST == 0) {                                            \
+            MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);      \
+        }                                                           \
+    } while (0)
+
+#define MSA_FLOAT_ULT(DEST, ARG1, ARG2, BITS, QUIET)                \
+    do {                                                            \
+        MSA_FLOAT_COND(DEST, unordered, ARG1, ARG2, BITS, QUIET);   \
+        if (DEST == 0) {                                            \
+            MSA_FLOAT_COND(DEST, lt, ARG1, ARG2, BITS, QUIET);      \
+        }                                                           \
+    } while (0)
+
+#define MSA_FLOAT_OR(DEST, ARG1, ARG2, BITS, QUIET)                 \
+    do {                                                            \
+        MSA_FLOAT_COND(DEST, le, ARG1, ARG2, BITS, QUIET);          \
+        if (DEST == 0) {                                            \
+            MSA_FLOAT_COND(DEST, le, ARG2, ARG1, BITS, QUIET);      \
+        }                                                           \
+    } while (0)
+
+static inline void compare_af(CPUMIPSState *env, void *pwd, void *pws,
+        void *pwt, uint32_t df, int quiet) {
+    wr_t wx, *pwx = &wx;
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_AF(W(pwx, i), W(pws, i), W(pwt, i), 32, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_AF(D(pwx, i), D(pws, i), D(pwt, i), 64, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fcaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_af(env, pwd, pws, pwt, df, 1);
+}
+
+void helper_msa_fsaf_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_af(env, pwd, pws, pwt, df, 0);
+}
+
+static inline void compare_eq(CPUMIPSState *env, void *pwd, void *pws,
+        void *pwt, uint32_t df, int quiet) {
+    wr_t wx, *pwx = &wx;
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_COND(W(pwx, i), eq, W(pws, i), W(pwt, i), 32, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_COND(D(pwx, i), eq, D(pws, i), D(pwt, i), 64, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fceq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_eq(env, pwd, pws, pwt, df, 1);
+}
+
+void helper_msa_fseq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_eq(env, pwd, pws, pwt, df, 0);
+}
+
+static inline void compare_ueq(CPUMIPSState *env, void *pwd, void *pws,
+        void *pwt, uint32_t df, int quiet) {
+    wr_t wx, *pwx = &wx;
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UEQ(W(pwx, i), W(pws, i), W(pwt, i), 32, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UEQ(D(pwx, i), D(pws, i), D(pwt, i), 64, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fcueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_ueq(env, pwd, pws, pwt, df, 1);
+}
+
+void helper_msa_fsueq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_ueq(env, pwd, pws, pwt, df, 0);
+}
+
+static inline void compare_ne(CPUMIPSState *env, void *pwd, void *pws,
+        void *pwt, uint32_t df, int quiet) {
+    wr_t wx, *pwx = &wx;
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_NE(W(pwx, i), W(pws, i), W(pwt, i), 32, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_NE(D(pwx, i), D(pws, i), D(pwt, i), 64, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fcne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_ne(env, pwd, pws, pwt, df, 1);
+}
+
+void helper_msa_fsne_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_ne(env, pwd, pws, pwt, df, 0);
+}
+
+static inline void compare_une(CPUMIPSState *env, void *pwd, void *pws,
+        void *pwt, uint32_t df, int quiet) {
+    wr_t wx, *pwx = &wx;
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UNE(W(pwx, i), W(pws, i), W(pwt, i), 32, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UNE(D(pwx, i), D(pws, i), D(pwt, i), 64, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fcune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_une(env, pwd, pws, pwt, df, 1);
+}
+
+void helper_msa_fsune_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_une(env, pwd, pws, pwt, df, 0);
+}
+
+static inline void compare_le(CPUMIPSState *env, void *pwd, void *pws,
+        void *pwt, uint32_t df, int quiet) {
+    wr_t wx, *pwx = &wx;
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_COND(W(pwx, i), le, W(pws, i), W(pwt, i), 32, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_COND(D(pwx, i), le, D(pws, i), D(pwt, i), 64, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fcle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_le(env, pwd, pws, pwt, df, 1);
+}
+
+void helper_msa_fsle_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_le(env, pwd, pws, pwt, df, 0);
+}
+
+static inline void compare_ule(CPUMIPSState *env, void *pwd, void *pws,
+        void *pwt, uint32_t df, int quiet) {
+    wr_t wx, *pwx = &wx;
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_ULE(W(pwx, i), W(pws, i), W(pwt, i), 32, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_ULE(D(pwx, i), D(pws, i), D(pwt, i), 64, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fcule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_ule(env, pwd, pws, pwt, df, 1);
+}
+
+void helper_msa_fsule_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_ule(env, pwd, pws, pwt, df, 0);
+}
+
+static inline void compare_lt(CPUMIPSState *env, void *pwd, void *pws,
+        void *pwt, uint32_t df, int quiet) {
+    wr_t wx, *pwx = &wx;
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_COND(W(pwx, i), lt, W(pws, i), W(pwt, i), 32, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_COND(D(pwx, i), lt, D(pws, i), D(pwt, i), 64, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fclt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_lt(env, pwd, pws, pwt, df, 1);
+}
+
+void helper_msa_fslt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_lt(env, pwd, pws, pwt, df, 0);
+}
+
+static inline void compare_ult(CPUMIPSState *env, void *pwd, void *pws,
+        void *pwt, uint32_t df, int quiet) {
+    wr_t wx, *pwx = &wx;
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_ULT(W(pwx, i), W(pws, i), W(pwt, i), 32, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_ULT(D(pwx, i), D(pws, i), D(pwt, i), 64, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fcult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_ult(env, pwd, pws, pwt, df, 1);
+}
+
+void helper_msa_fsult_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_ult(env, pwd, pws, pwt, df, 0);
+}
+
+static inline void compare_un(CPUMIPSState *env, void *pwd, void *pws,
+        void *pwt, uint32_t df, int quiet) {
+    wr_t wx, *pwx = &wx;
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_COND(W(pwx, i), unordered, W(pws, i), W(pwt, i), 32,
+                    quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_COND(D(pwx, i), unordered, D(pws, i), D(pwt, i), 64,
+                    quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fcun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_un(env, pwd, pws, pwt, df, 1);
+}
+
+void helper_msa_fsun_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_un(env, pwd, pws, pwt, df, 0);
+}
+
+static inline void compare_or(CPUMIPSState *env, void *pwd, void *pws,
+        void *pwt, uint32_t df, int quiet) {
+    wr_t wx, *pwx = &wx;
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_OR(W(pwx, i), W(pws, i), W(pwt, i), 32, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_OR(D(pwx, i), D(pws, i), D(pwt, i), 64, quiet);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fcor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_or(env, pwd, pws, pwt, df, 1);
+}
+
+void helper_msa_fsor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+    compare_or(env, pwd, pws, pwt, df, 0);
+}
+
+static inline float16 float16_from_float32(int32 a, flag ieee STATUS_PARAM)
+{
+      float16 f_val;
+
+      f_val = float32_to_float16((float32)a, ieee  STATUS_VAR);
+      f_val = float16_maybe_silence_nan(f_val);
+
+      return a < 0 ? (f_val | (1 << 15)) : f_val;
+}
+
+static inline float32 float32_from_float64(int64 a STATUS_PARAM)
+{
+      float32 f_val;
+
+      f_val = float64_to_float32((float64)a STATUS_VAR);
+      f_val = float32_maybe_silence_nan(f_val);
+
+      return a < 0 ? (f_val | (1 << 31)) : f_val;
+}
+
+void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            /* Half precision floats come in two formats: standard
+               IEEE and "ARM" format.  The latter gains extra exponent
+               range by omitting the NaN/Inf encodings.  */
+            flag ieee = 1;
+
+            MSA_FLOAT_BINOP(HL(pwx, i), from_float32, W(pws, i), ieee, 16);
+            MSA_FLOAT_BINOP(HR(pwx, i), from_float32, W(pwt, i), ieee, 16);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(WL(pwx, i), from_float64, D(pws, i), 32);
+            MSA_FLOAT_UNOP(WR(pwx, i), from_float64, D(pwt, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+    msa_move_v(pwd, pwx);
+}
+
+static inline int16_t float32_to_q16(float32 a STATUS_PARAM)
+{
+    int32 q_val;
+    int32 q_min = 0xffff8000;
+    int32 q_max = 0x00007fff;
+
+    int ieee_ex;
+
+    if (float32_is_any_nan(a)) {
+        float_raise(float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+
+    /* scaling */
+    a = float32_scalbn(a, 15 STATUS_VAR);
+
+    ieee_ex = get_float_exception_flags(status);
+    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
+                              STATUS_VAR);
+
+    if (ieee_ex & float_flag_overflow) {
+        float_raise(float_flag_inexact STATUS_VAR);
+        return (int32)a < 0 ? q_min : q_max;
+    }
+
+    /* conversion to int */
+    q_val = float32_to_int32(a STATUS_VAR);
+
+    ieee_ex = get_float_exception_flags(status);
+    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
+                              STATUS_VAR);
+
+    if (ieee_ex & float_flag_invalid) {
+        set_float_exception_flags(ieee_ex & (~float_flag_invalid)
+                                STATUS_VAR);
+        float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
+        return (int32)a < 0 ? q_min : q_max;
+    }
+
+    if (q_val < q_min) {
+        float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
+        return (int16_t)q_min;
+    }
+
+    if (q_max < q_val) {
+        float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
+        return (int16_t)q_max;
+    }
+
+    return (int16_t)q_val;
+}
+
+static inline int32 float64_to_q32(float64 a STATUS_PARAM)
+{
+    int64 q_val;
+    int64 q_min = 0xffffffff80000000LL;
+    int64 q_max = 0x000000007fffffffLL;
+
+    int ieee_ex;
+
+    if (float64_is_any_nan(a)) {
+        float_raise(float_flag_invalid STATUS_VAR);
+        return 0;
+    }
+
+    /* scaling */
+    a = float64_scalbn(a, 31 STATUS_VAR);
+
+    ieee_ex = get_float_exception_flags(status);
+    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
+            STATUS_VAR);
+
+    if (ieee_ex & float_flag_overflow) {
+        float_raise(float_flag_inexact STATUS_VAR);
+        return (int64)a < 0 ? q_min : q_max;
+    }
+
+    /* conversion to integer */
+    q_val = float64_to_int64(a STATUS_VAR);
+
+    ieee_ex = get_float_exception_flags(status);
+    set_float_exception_flags(ieee_ex & (~float_flag_underflow)
+            STATUS_VAR);
+
+    if (ieee_ex & float_flag_invalid) {
+        set_float_exception_flags(ieee_ex & (~float_flag_invalid)
+                STATUS_VAR);
+        float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
+        return (int64)a < 0 ? q_min : q_max;
+    }
+
+    if (q_val < q_min) {
+        float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
+        return (int32)q_min;
+    }
+
+    if (q_max < q_val) {
+        float_raise(float_flag_overflow | float_flag_inexact STATUS_VAR);
+        return (int32)q_max;
+    }
+
+    return (int32)q_val;
+}
+
+void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws, uint32_t wt)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP_XD(HL(pwx, i), to_q16, W(pws, i), 32, 16);
+            MSA_FLOAT_UNOP_XD(HR(pwx, i), to_q16, W(pwt, i), 32, 16);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP_XD(WL(pwx, i), to_q32, D(pws, i), 64, 32);
+            MSA_FLOAT_UNOP_XD(WR(pwx, i), to_q32, D(pwt, i), 64, 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
 target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
 {
     switch (cs) {
@@ -2828,12 +4400,10 @@ void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
         break;
     case MSACSR_REGISTER:
         env->active_msa.msacsr = (int32_t)elm & MSACSR_BITS;
-
         /* set float_status rounding mode */
         set_float_rounding_mode(
             ieee_rm[(env->active_msa.msacsr & MSACSR_RM_MASK) >> MSACSR_RM_POS],
             &env->active_msa.fp_status);
-
         /* set float_status flush modes */
         set_flush_to_zero(
           (env->active_msa.msacsr & MSACSR_FS_BIT) != 0 ? 1 : 0,
@@ -2841,7 +4411,6 @@ void helper_msa_ctcmsa(CPUMIPSState *env, target_ulong elm, uint32_t cd)
         set_flush_inputs_to_zero(
           (env->active_msa.msacsr & MSACSR_FS_BIT) != 0 ? 1 : 0,
           &env->active_msa.fp_status);
-
         /* check exception */
         if ((GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED)
             & GET_FP_CAUSE(env->active_msa.msacsr)) {
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 6c8caa4..39caa6d 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -15452,6 +15452,204 @@ static void gen_msa_elm(CPUMIPSState *env, DisasContext *ctx)
     }
 }
 
+static void gen_msa_3rf(CPUMIPSState *env, DisasContext *ctx)
+{
+#define MASK_MSA_3RF(op)    (MASK_MSA_MINOR(op) | (op & (0xf << 22)))
+    uint32_t opcode = ctx->opcode;
+
+    uint8_t df2 = (ctx->opcode >> 21) & 0x1 /* df [21:21] */;
+    uint8_t df1 = (ctx->opcode >> 21) & 0x1 /* df [21:21] */;
+    /* adjust df value for floating-point instruction */
+    df2 = df2 + 2;
+    df1 = df1 + 1;
+    uint8_t wt = (ctx->opcode >> 16) & 0x1f /* wt [20:16] */;
+    uint8_t ws = (ctx->opcode >> 11) & 0x1f /* ws [15:11] */;
+    uint8_t wd = (ctx->opcode >> 6) & 0x1f /* wd [10:6] */;
+
+    TCGv_i32 twd = tcg_const_i32(wd);
+    TCGv_i32 tws = tcg_const_i32(ws);
+    TCGv_i32 twt = tcg_const_i32(wt);
+    TCGv_i32 tdf2 = tcg_const_i32(df2);
+    TCGv_i32 tdf1 = tcg_const_i32(df1);
+
+    switch (MASK_MSA_3RF(opcode)) {
+    case OPC_MSA_FCAF_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fcaf_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FADD_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fadd_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FCUN_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fcun_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FSUB_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fsub_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FCOR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fcor_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FCEQ_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fceq_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FMUL_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fmul_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FCUNE_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fcune_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FCUEQ_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fcueq_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FDIV_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fdiv_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FCNE_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fcne_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FCLT_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fclt_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FMADD_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fmadd_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_MUL_Q_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_mul_q_df(cpu_env, tdf1, twd, tws, twt);
+        break;
+    case OPC_MSA_FCULT_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fcult_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FMSUB_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fmsub_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_MADD_Q_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_madd_q_df(cpu_env, tdf1, twd, tws, twt);
+        break;
+    case OPC_MSA_FCLE_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fcle_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_MSUB_Q_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_msub_q_df(cpu_env, tdf1, twd, tws, twt);
+        break;
+    case OPC_MSA_FCULE_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fcule_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FEXP2_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fexp2_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FSAF_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fsaf_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FEXDO_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fexdo_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FSUN_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fsun_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FSOR_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fsor_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FSEQ_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fseq_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FTQ_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_ftq_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FSUNE_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fsune_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FSUEQ_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fsueq_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FSNE_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fsne_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FSLT_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fslt_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FMIN_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fmin_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_MULR_Q_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_mulr_q_df(cpu_env, tdf1, twd, tws, twt);
+        break;
+    case OPC_MSA_FSULT_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fsult_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FMIN_A_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fmin_a_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_MADDR_Q_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_maddr_q_df(cpu_env, tdf1, twd, tws, twt);
+        break;
+    case OPC_MSA_FSLE_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fsle_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FMAX_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fmax_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_MSUBR_Q_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_msubr_q_df(cpu_env, tdf1, twd, tws, twt);
+        break;
+    case OPC_MSA_FSULE_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fsule_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    case OPC_MSA_FMAX_A_df:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_fmax_a_df(cpu_env, tdf2, twd, tws, twt);
+        break;
+    default:
+        MIPS_INVAL("MSA instruction");
+        generate_exception(ctx, EXCP_RI);
+        break;
+    }
+
+    tcg_temp_free_i32(twd);
+    tcg_temp_free_i32(tws);
+    tcg_temp_free_i32(twt);
+    tcg_temp_free_i32(tdf2);
+    tcg_temp_free_i32(tdf1);
+}
+
 static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
 {
     uint32_t opcode = ctx->opcode;
@@ -15485,6 +15683,11 @@ static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
     case OPC_MSA_ELM:
         gen_msa_elm(env, ctx);
         break;
+    case OPC_MSA_3RF_1A:
+    case OPC_MSA_3RF_1B:
+    case OPC_MSA_3RF_1C:
+        gen_msa_3rf(env, ctx);
+        break;
     default:
         MIPS_INVAL("MSA instruction");
         generate_exception(ctx, EXCP_RI);
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 16/20] target-mips: add MSA VEC/2R format instructions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (14 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 15/20] target-mips: add MSA 3RF " Yongbok Kim
@ 2014-07-14  9:55 ` Yongbok Kim
  2014-07-14  9:56 ` [Qemu-devel] [PATCH 17/20] target-mips: add MSA 2RF " Yongbok Kim
                   ` (3 subsequent siblings)
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:55 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA VEC/2R format instructions

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.h     |   11 ++
 target-mips/msa_helper.c |  244 ++++++++++++++++++++++++++++++++++++++++++++++
 target-mips/translate.c  |   98 ++++++++++++++++++
 3 files changed, 353 insertions(+), 0 deletions(-)

diff --git a/target-mips/helper.h b/target-mips/helper.h
index fec21b6..b87bb50 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -698,6 +698,7 @@ DEF_HELPER_5(msa_adds_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_adds_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_addv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_addvi_df, void, env, i32, i32, i32, s64)
+DEF_HELPER_4(msa_and_v, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_andi_b, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_asub_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_asub_u_df, void, env, i32, i32, i32, i32)
@@ -711,10 +712,13 @@ DEF_HELPER_5(msa_binsl_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_binsli_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_binsr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_binsri_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_bmnz_v, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_bmnzi_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_bmz_v, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_bmzi_b, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_bneg_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_bnegi_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_bsel_v, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_bseli_b, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_bset_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_bseti_df, void, env, i32, i32, i32, i32)
@@ -755,6 +759,7 @@ DEF_HELPER_5(msa_fcune_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fdiv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fexdo_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fexp2_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_fill_df, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_fmadd_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fmax_a_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fmax_df, void, env, i32, i32, i32, i32)
@@ -808,10 +813,15 @@ DEF_HELPER_5(msa_msubv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_mul_q_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_mulr_q_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_mulv_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_nloc_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_nlzc_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_nor_v, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_nori_b, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_or_v, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_ori_b, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_pckev_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_pckod_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_pcnt_df, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_sat_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_sat_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_shf_df, void, env, i32, i32, i32, i32)
@@ -836,4 +846,5 @@ DEF_HELPER_5(msa_subsuu_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_subv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_subvi_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_vshf_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_xor_v, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_xori_b, void, env, i32, i32, i32)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index bb4ab66..aa165ac 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -495,6 +495,22 @@ void helper_msa_subsus_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+void helper_msa_and_v(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    ALL_D_ELEMENTS(i, MSA_WRLEN) {
+        D(pwd, i) = D(pws, i) & D(pwt, i);
+    } DONE_ALL_ELEMENTS;
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_andi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
@@ -508,6 +524,22 @@ void helper_msa_andi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
     }
 }
 
+void helper_msa_or_v(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    ALL_D_ELEMENTS(i, MSA_WRLEN) {
+        D(pwd, i) = D(pws, i) | D(pwt, i);
+    } DONE_ALL_ELEMENTS;
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_ori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
@@ -521,6 +553,22 @@ void helper_msa_ori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
     }
 }
 
+void helper_msa_nor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    ALL_D_ELEMENTS(i, MSA_WRLEN) {
+        D(pwd, i) = ~(D(pws, i) | D(pwt, i));
+    } DONE_ALL_ELEMENTS;
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_nori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
@@ -534,6 +582,22 @@ void helper_msa_nori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
     }
 }
 
+void helper_msa_xor_v(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    ALL_D_ELEMENTS(i, MSA_WRLEN) {
+        D(pwd, i) = D(pws, i) ^ D(pwt, i);
+    } DONE_ALL_ELEMENTS;
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_xori_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
@@ -922,6 +986,22 @@ void helper_msa_binsri_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
 #define BIT_MOVE_IF_NOT_ZERO(dest, arg1, arg2, df) \
             dest = UNSIGNED(((dest & (~arg2)) | (arg1 & arg2)), df)
 
+void helper_msa_bmnz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    ALL_D_ELEMENTS(i, MSA_WRLEN) {
+        BIT_MOVE_IF_NOT_ZERO(D(pwd, i), D(pws, i), D(pwt, i), DF_DOUBLE);
+    } DONE_ALL_ELEMENTS;
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_bmnzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
@@ -938,6 +1018,22 @@ void helper_msa_bmnzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
 #define BIT_MOVE_IF_ZERO(dest, arg1, arg2, df) \
             dest = UNSIGNED((dest & arg2) | (arg1 & (~arg2)), df)
 
+void helper_msa_bmz_v(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    ALL_D_ELEMENTS(i, MSA_WRLEN) {
+        BIT_MOVE_IF_ZERO(D(pwd, i), D(pws, i), D(pwt, i), DF_DOUBLE);
+    } DONE_ALL_ELEMENTS;
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_bmzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
@@ -954,6 +1050,22 @@ void helper_msa_bmzi_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
 #define BIT_SELECT(dest, arg1, arg2, df) \
             dest = UNSIGNED((arg1 & (~dest)) | (arg2 & dest), df)
 
+void helper_msa_bsel_v(CPUMIPSState *env, uint32_t wd, uint32_t ws,
+        uint32_t wt)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+    void *pwt = &(env->active_fpu.fpr[wt]);
+
+    ALL_D_ELEMENTS(i, MSA_WRLEN) {
+        BIT_SELECT(D(pwd, i), D(pws, i), D(pwt, i), DF_DOUBLE);
+    } DONE_ALL_ELEMENTS;
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 void helper_msa_bseli_b(CPUMIPSState *env, uint32_t wd, uint32_t ws,
         uint32_t i8)
 {
@@ -2251,6 +2363,42 @@ void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+void helper_msa_fill_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t rs)
+{
+    void *pwd = &(env->active_fpu.fpr[wd]);
+
+    switch (df) {
+    case DF_BYTE:
+        ALL_B_ELEMENTS(i, MSA_WRLEN) {
+            B(pwd, i)   = (int8_t)env->active_tc.gpr[rs];
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_HALF:
+        ALL_H_ELEMENTS(i, MSA_WRLEN) {
+            H(pwd, i)   = (int16_t)env->active_tc.gpr[rs];
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            W(pwd, i)   = (int32_t)env->active_tc.gpr[rs];
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            D(pwd, i)   = (int64_t)env->active_tc.gpr[rs];
+        } DONE_ALL_ELEMENTS;
+       break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 static inline void msa_insert_df(CPUMIPSState *env, uint32_t df, void *pwd,
         target_ulong rs, uint32_t n)
 {
@@ -2437,6 +2585,102 @@ void helper_msa_mod_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+static inline int64_t msa_nlzc_df(CPUMIPSState *env, uint32_t df, int64_t arg)
+{
+    uint64_t x, y;
+    int n, c;
+
+    x = UNSIGNED(arg, df);
+    n = DF_BITS(df);
+    c = DF_BITS(df) / 2;
+
+    do {
+        y = x >> c;
+        if (y != 0) {
+            n = n - c;
+            x = y;
+        }
+        c = c >> 1;
+    } while (c != 0);
+
+    return n - x;
+}
+
+void helper_msa_nlzc_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_nlzc_df(env, df, ts);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_nloc_df(CPUMIPSState *env, uint32_t df, int64_t arg)
+{
+    return msa_nlzc_df(env, df, UNSIGNED((~arg), df));
+}
+
+void helper_msa_nloc_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_nloc_df(env, df, ts);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline int64_t msa_pcnt_df(CPUMIPSState *env, uint32_t df, int64_t arg)
+{
+    uint64_t x;
+
+    x = UNSIGNED(arg, df);
+
+    x = (x & 0x5555555555555555ULL) + ((x >>  1) & 0x5555555555555555ULL);
+    x = (x & 0x3333333333333333ULL) + ((x >>  2) & 0x3333333333333333ULL);
+    x = (x & 0x0F0F0F0F0F0F0F0FULL) + ((x >>  4) & 0x0F0F0F0F0F0F0F0FULL);
+    x = (x & 0x00FF00FF00FF00FFULL) + ((x >>  8) & 0x00FF00FF00FF00FFULL);
+    x = (x & 0x0000FFFF0000FFFFULL) + ((x >> 16) & 0x0000FFFF0000FFFFULL);
+    x = (x & 0x00000000FFFFFFFFULL) + ((x >> 32));
+
+    return x;
+}
+
+void helper_msa_pcnt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_pcnt_df(env, df, ts);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 static inline int64_t msa_sat_u_df(CPUMIPSState *env, uint32_t df, int64_t arg,
         uint32_t m)
 {
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 39caa6d..4185019 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -15650,6 +15650,101 @@ static void gen_msa_3rf(CPUMIPSState *env, DisasContext *ctx)
     tcg_temp_free_i32(tdf1);
 }
 
+static void gen_msa_vec(CPUMIPSState *env, DisasContext *ctx)
+{
+#define MASK_MSA_VEC(op)    (MASK_MSA_MINOR(op) | (op & (0x1f << 21)))
+#define MASK_MSA_2R(op)     (MASK_MSA_MINOR(op) | (op & (0x1f << 21)) | \
+                            (op & (0x7 << 18)))
+#define MASK_MSA_2RF(op)    (MASK_MSA_MINOR(op) | (op & (0x1f << 21)) | \
+                            (op & (0xf << 17)))
+
+    uint32_t opcode = ctx->opcode;
+
+    uint8_t wt = (ctx->opcode >> 16) & 0x1f /* wt [20:16] */;
+    uint8_t ws = (ctx->opcode >> 11) & 0x1f /* ws [15:11] */;
+    uint8_t wd = (ctx->opcode >> 6) & 0x1f /* wd [10:6] */;
+
+    TCGv_i32 twd = tcg_const_i32(wd);
+    TCGv_i32 tws = tcg_const_i32(ws);
+    TCGv_i32 twt = tcg_const_i32(wt);
+
+    switch (MASK_MSA_VEC(opcode)) {
+    case OPC_MSA_AND_V:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_and_v(cpu_env, twd, tws, twt);
+        break;
+    case OPC_MSA_OR_V:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_or_v(cpu_env, twd, tws, twt);
+        break;
+    case OPC_MSA_NOR_V:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_nor_v(cpu_env, twd, tws, twt);
+        break;
+    case OPC_MSA_XOR_V:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_xor_v(cpu_env, twd, tws, twt);
+        break;
+    case OPC_MSA_BMNZ_V:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_bmnz_v(cpu_env, twd, tws, twt);
+        break;
+    case OPC_MSA_BMZ_V:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_bmz_v(cpu_env, twd, tws, twt);
+        break;
+    case OPC_MSA_BSEL_V:
+        check_msa_access(env, ctx, wt, ws, wd);
+        gen_helper_msa_bsel_v(cpu_env, twd, tws, twt);
+        break;
+
+    case OPC_MSA_2R:
+        {
+            uint8_t df = (ctx->opcode >> 16) & 0x3 /* df [17:16] */;
+            TCGv_i32 tdf = tcg_const_i32(df);
+
+            switch (MASK_MSA_2R(opcode)) {
+            case OPC_MSA_FILL_df:
+#if !defined(TARGET_MIPS64)
+                /* Double format valid only for MIPS64 */
+                if (df == 3) {
+                    if (check_msa_access(env, ctx, -1, -1, -1)) {
+                        generate_exception(ctx, EXCP_RI);
+                    }
+                    break;
+                }
+#endif
+                check_msa_access(env, ctx, -1, -1, wd);
+                gen_helper_msa_fill_df(cpu_env, tdf, twd, tws); /* trs */
+                break;
+            case OPC_MSA_PCNT_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_pcnt_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_NLOC_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_nloc_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_NLZC_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_nlzc_df(cpu_env, tdf, twd, tws);
+                break;
+            default:
+                break;
+            }
+
+            tcg_temp_free_i32(tdf);
+        }
+        break;
+    case OPC_MSA_2RF:
+    default:
+        break;
+    }
+
+    tcg_temp_free_i32(twd);
+    tcg_temp_free_i32(tws);
+    tcg_temp_free_i32(twt);
+}
 static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
 {
     uint32_t opcode = ctx->opcode;
@@ -15688,6 +15783,9 @@ static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
     case OPC_MSA_3RF_1C:
         gen_msa_3rf(env, ctx);
         break;
+    case OPC_MSA_VEC:
+        gen_msa_vec(env, ctx);
+        break;
     default:
         MIPS_INVAL("MSA instruction");
         generate_exception(ctx, EXCP_RI);
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 17/20] target-mips: add MSA 2RF format instructions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (15 preceding siblings ...)
  2014-07-14  9:55 ` [Qemu-devel] [PATCH 16/20] target-mips: add MSA VEC/2R " Yongbok Kim
@ 2014-07-14  9:56 ` Yongbok Kim
  2014-07-14  9:56 ` [Qemu-devel] [PATCH 18/20] target-mips: add MSA MI10 " Yongbok Kim
                   ` (2 subsequent siblings)
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:56 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA 2RF format instructions

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.h     |   16 ++
 target-mips/msa_helper.c |  656 ++++++++++++++++++++++++++++++++++++++++++++++
 target-mips/translate.c  |   76 ++++++
 3 files changed, 748 insertions(+), 0 deletions(-)

diff --git a/target-mips/helper.h b/target-mips/helper.h
index b87bb50..c86bd36 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -747,6 +747,7 @@ DEF_HELPER_5(msa_dpsub_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fadd_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fcaf_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fceq_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_fclass_df, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_fcle_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fclt_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fcne_df, void, env, i32, i32, i32, i32)
@@ -759,7 +760,14 @@ DEF_HELPER_5(msa_fcune_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fdiv_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fexdo_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fexp2_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_fexupl_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_fexupr_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_ffint_s_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_ffint_u_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_ffql_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_ffqr_df, void, env, i32, i32, i32)
 DEF_HELPER_4(msa_fill_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_flog2_df, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_fmadd_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fmax_a_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fmax_df, void, env, i32, i32, i32, i32)
@@ -767,19 +775,27 @@ DEF_HELPER_5(msa_fmin_a_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fmin_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fmsub_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fmul_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_frcp_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_frint_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_frsqrt_df, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_fsaf_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fseq_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fsle_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fslt_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fsne_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fsor_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_fsqrt_df, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_fsub_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fsueq_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fsule_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fsult_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fsun_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_fsune_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_ftint_s_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_ftint_u_df, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_ftq_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_4(msa_ftrunc_s_df, void, env, i32, i32, i32)
+DEF_HELPER_4(msa_ftrunc_u_df, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_hadd_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_hadd_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_hsub_s_df, void, env, i32, i32, i32, i32)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index aa165ac..d152953 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -3240,6 +3240,9 @@ void helper_msa_msubr_q_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+#define FLOAT_ONE32 make_float32(0x3f8 << 20)
+#define FLOAT_ONE64 make_float64(0x3ffULL << 52)
+
 #define FLOAT_SNAN16 (float16_default_nan ^ 0x0220)
         /* 0x7c20 */
 #define FLOAT_SNAN32 (float32_default_nan ^ 0x00400020)
@@ -3352,6 +3355,28 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
     (!float ## BITS ## _is_zero(ARG)                \
     && float ## BITS ## _is_zero_or_denormal(ARG))
 
+#define MSA_FLOAT_UNOP0(DEST, OP, ARG, BITS)                                \
+    do {                                                                    \
+        int c;                                                              \
+        int cause;                                                          \
+        int enable;                                                         \
+                                                                            \
+        set_float_exception_flags(0, &env->active_msa.fp_status);           \
+        DEST = float ## BITS ## _ ## OP(ARG, &env->active_msa.fp_status);   \
+        c = update_msacsr(env, CLEAR_FS_UNDERFLOW, 0);                      \
+        enable = GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED;  \
+        cause = c & enable;                                                 \
+                                                                            \
+        if (cause) {                                                        \
+            DEST = ((FLOAT_SNAN ## BITS >> 6) << 6) | c;                    \
+        }                                                                   \
+        else {                                                              \
+            if (float ## BITS ## _is_any_nan(ARG)) {                        \
+                DEST = 0;                                                   \
+            }                                                               \
+        }                                                                   \
+    } while (0)
+
 #define MSA_FLOAT_UNOP_XD(DEST, OP, ARG, BITS, XBITS)                       \
     do {                                                                    \
         int c;                                                              \
@@ -3386,6 +3411,37 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
         }                                                                   \
     } while (0)
 
+#define MSA_FLOAT_LOGB(DEST, ARG, BITS)                                     \
+    do {                                                                    \
+        int c;                                                              \
+        int cause;                                                          \
+        int enable;                                                         \
+                                                                            \
+        set_float_exception_flags(0, &env->active_msa.fp_status);           \
+        set_float_rounding_mode(float_round_down,                           \
+                                &env->active_msa.fp_status);                \
+        DEST = float ## BITS ## _ ## log2(ARG,                              \
+                                          &env->active_msa.fp_status);      \
+        DEST = float ## BITS ## _ ## round_to_int(DEST,                     \
+                                               &env->active_msa.fp_status); \
+        set_float_rounding_mode(ieee_rm[(env->active_msa.msacsr &           \
+                                         MSACSR_RM_MASK) >> MSACSR_RM_POS], \
+                                &env->active_msa.fp_status);                \
+                                                                            \
+        set_float_exception_flags(                                          \
+            get_float_exception_flags(&env->active_msa.fp_status)           \
+                                                & (~float_flag_inexact),    \
+            &env->active_msa.fp_status);                                    \
+                                                                            \
+        c = update_msacsr(env, 0, IS_DENORMAL(DEST, BITS));                 \
+        enable = GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED;  \
+        cause = c & enable;                                                 \
+                                                                            \
+        if (cause) {                                                        \
+            DEST = ((FLOAT_SNAN ## BITS >> 6) << 6) | c;                    \
+        }                                                                   \
+    } while (0)
+
 #define MSA_FLOAT_BINOP(DEST, OP, ARG1, ARG2, BITS)                         \
     do {                                                                    \
         int c;                                                              \
@@ -3422,6 +3478,27 @@ static inline int update_msacsr(CPUMIPSState *env, int action, int denormal)
         }                                                                   \
     } while (0)
 
+#define MSA_FLOAT_RECIPROCAL(DEST, ARG, BITS)                               \
+    do {                                                                    \
+        int c;                                                              \
+        int cause;                                                          \
+        int enable;                                                         \
+                                                                            \
+        set_float_exception_flags(0, &env->active_msa.fp_status);           \
+        DEST = float ## BITS ## _ ## div(FLOAT_ONE ## BITS, ARG,            \
+                                         &env->active_msa.fp_status);       \
+        c = update_msacsr(env, float ## BITS ## _is_infinity(ARG) ||        \
+                          float ## BITS ## _is_quiet_nan(DEST) ?            \
+                          0 : RECIPROCAL_INEXACT,                           \
+                          IS_DENORMAL(DEST, BITS));                         \
+        enable = GET_FP_ENABLE(env->active_msa.msacsr) | FP_UNIMPLEMENTED;  \
+        cause = c & enable;                                                 \
+                                                                            \
+        if (cause) {                                                        \
+            DEST = ((FLOAT_SNAN ## BITS >> 6) << 6) | c;                    \
+        }                                                                   \
+    } while (0)
+
 #define MSA_FLOAT_MULADD(DEST, ARG1, ARG2, ARG3, NEGATE, BITS)              \
     do {                                                                    \
         int c;                                                              \
@@ -3566,6 +3643,36 @@ void helper_msa_fdiv_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     msa_move_v(pwd, pwx);
 }
 
+void helper_msa_fsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(W(pwx, i), sqrt, W(pws, i), 32);
+         } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(D(pwx, i), sqrt, D(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
 void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, uint32_t wt)
 {
@@ -3603,6 +3710,36 @@ void helper_msa_fexp2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     msa_move_v(pwd, pwx);
 }
 
+void helper_msa_flog2_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_LOGB(W(pwx, i), W(pws, i), 32);
+         } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_LOGB(D(pwx, i), D(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
 void helper_msa_fmadd_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, uint32_t wt)
 {
@@ -4417,6 +4554,89 @@ void helper_msa_fsor_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     compare_or(env, pwd, pws, pwt, df, 0);
 }
 
+#define MSA_FLOAT_CLASS_SIGNALING_NAN      0x001
+#define MSA_FLOAT_CLASS_QUIET_NAN          0x002
+
+#define MSA_FLOAT_CLASS_NEGATIVE_INFINITY  0x004
+#define MSA_FLOAT_CLASS_NEGATIVE_NORMAL    0x008
+#define MSA_FLOAT_CLASS_NEGATIVE_SUBNORMAL 0x010
+#define MSA_FLOAT_CLASS_NEGATIVE_ZERO      0x020
+
+#define MSA_FLOAT_CLASS_POSITIVE_INFINITY  0x040
+#define MSA_FLOAT_CLASS_POSITIVE_NORMAL    0x080
+#define MSA_FLOAT_CLASS_POSITIVE_SUBNORMAL 0x100
+#define MSA_FLOAT_CLASS_POSITIVE_ZERO      0x200
+
+#define MSA_FLOAT_CLASS(ARG, BITS)                              \
+    do {                                                        \
+        int mask;                                               \
+        int snan, qnan, inf, neg, zero, dnmz;                   \
+                                                                \
+        snan = float ## BITS ## _is_signaling_nan(ARG);         \
+        qnan = float ## BITS ## _is_quiet_nan(ARG);             \
+        inf  = float ## BITS ## _is_infinity(ARG);              \
+        neg  = float ## BITS ## _is_neg(ARG);                   \
+        zero = float ## BITS ## _is_zero(ARG);                  \
+        dnmz = float ## BITS ## _is_zero_or_denormal(ARG);      \
+                                                                \
+        mask = 0;                                               \
+        if (snan) {                                             \
+            mask |= MSA_FLOAT_CLASS_SIGNALING_NAN;              \
+        }                                                       \
+        else if (qnan) {                                        \
+            mask |= MSA_FLOAT_CLASS_QUIET_NAN;                  \
+        } else if (neg) {                                       \
+            if (inf) {                                          \
+                mask |= MSA_FLOAT_CLASS_NEGATIVE_INFINITY;      \
+            } else if (zero) {                                  \
+                mask |= MSA_FLOAT_CLASS_NEGATIVE_ZERO;          \
+            } else if (dnmz) {                                  \
+                mask |= MSA_FLOAT_CLASS_NEGATIVE_SUBNORMAL;     \
+            }                                                   \
+            else {                                              \
+                mask |= MSA_FLOAT_CLASS_NEGATIVE_NORMAL;        \
+            }                                                   \
+        } else {                                                \
+            if (inf) {                                          \
+                mask |= MSA_FLOAT_CLASS_POSITIVE_INFINITY;      \
+            } else if (zero) {                                  \
+                mask |= MSA_FLOAT_CLASS_POSITIVE_ZERO;          \
+            } else if (dnmz) {                                  \
+                mask |= MSA_FLOAT_CLASS_POSITIVE_SUBNORMAL;     \
+            } else {                                            \
+                mask |= MSA_FLOAT_CLASS_POSITIVE_NORMAL;        \
+            }                                                   \
+        }                                                       \
+        return mask;                                            \
+    } while (0)
+
+static inline int64_t msa_fclass_df(CPUMIPSState *env, uint32_t df, int64_t arg)
+{
+    if (df == DF_WORD) {
+        MSA_FLOAT_CLASS(arg, 32);
+    } else {
+        MSA_FLOAT_CLASS(arg, 64);
+    }
+}
+
+void helper_msa_fclass_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    uint64_t td, ts;
+    int i;
+    int df_bits = 8 * (1 << df);
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        ts = msa_load_wr_elem_s64(env, ws, df, i);
+        td = msa_fclass_df(env, df, ts);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 static inline float16 float16_from_float32(int32 a, flag ieee STATUS_PARAM)
 {
       float16 f_val;
@@ -4437,6 +4657,26 @@ static inline float32 float32_from_float64(int64 a STATUS_PARAM)
       return a < 0 ? (f_val | (1 << 31)) : f_val;
 }
 
+static inline float32 float32_from_float16(int16_t a, flag ieee STATUS_PARAM)
+{
+      float32 f_val;
+
+      f_val = float16_to_float32((float16)a, ieee STATUS_VAR);
+      f_val = float32_maybe_silence_nan(f_val);
+
+      return a < 0 ? (f_val | (1 << 31)) : f_val;
+}
+
+static inline float64 float64_from_float32(int32 a STATUS_PARAM)
+{
+      float64 f_val;
+
+      f_val = float32_to_float64((float64)a STATUS_VAR);
+      f_val = float64_maybe_silence_nan(f_val);
+
+      return a < 0 ? (f_val | (1ULL << 63)) : f_val;
+}
+
 void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, uint32_t wt)
 {
@@ -4472,6 +4712,308 @@ void helper_msa_fexdo_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     msa_move_v(pwd, pwx);
 }
 
+void helper_msa_fexupl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            /* Half precision floats come in two formats: standard
+               IEEE and "ARM" format.  The latter gains extra exponent
+               range by omitting the NaN/Inf encodings.  */
+            flag ieee = 1;
+
+            MSA_FLOAT_BINOP(W(pwx, i), from_float16, HL(pws, i), ieee, 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(D(pwx, i), from_float32, WL(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_fexupr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            /* Half precision floats come in two formats: standard
+               IEEE and "ARM" format.  The latter gains extra exponent
+               range by omitting the NaN/Inf encodings.  */
+            flag ieee = 1;
+
+            MSA_FLOAT_BINOP(W(pwx, i), from_float16, HR(pws, i), ieee, 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(D(pwx, i), from_float32, WR(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+    msa_move_v(pwd, pwx);
+}
+
+#define float32_from_int32 int32_to_float32
+#define float32_from_uint32 uint32_to_float32
+
+#define float64_from_int64 int64_to_float64
+#define float64_from_uint64 uint64_to_float64
+
+void helper_msa_ffint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(W(pwx, i), from_int32, W(pws, i), 32);
+         } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(D(pwx, i), from_int64, D(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ffint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(W(pwx, i), from_uint32, W(pws, i), 32);
+         } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(D(pwx, i), from_uint64, D(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ftint_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UNOP0(W(pwx, i), to_int32, W(pws, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UNOP0(D(pwx, i), to_int64, D(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ftint_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UNOP0(W(pwx, i), to_uint32, W(pws, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UNOP0(D(pwx, i), to_uint64, D(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ftrunc_s_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UNOP0(W(pwx, i), to_int32_round_to_zero, W(pws, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UNOP0(D(pwx, i), to_int64_round_to_zero, D(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ftrunc_u_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UNOP0(W(pwx, i), to_uint32_round_to_zero, W(pws, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+          MSA_FLOAT_UNOP0(D(pwx, i), to_uint64_round_to_zero, D(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_frint_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(W(pwx, i), round_to_int, W(pws, i), 32);
+         } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(D(pwx, i), round_to_int, D(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+static inline float32 float32_from_q16(int16_t a STATUS_PARAM)
+{
+    float32 f_val;
+
+    /* conversion as integer and scaling */
+    f_val = int32_to_float32(a STATUS_VAR);
+    f_val = float32_scalbn(f_val, -15 STATUS_VAR);
+
+    return f_val;
+}
+
+static inline float64 float64_from_q32(int32 a STATUS_PARAM)
+{
+    float64 f_val;
+
+    /* conversion as integer and scaling */
+    f_val = int32_to_float64(a STATUS_VAR);
+    f_val = float64_scalbn(f_val, -31 STATUS_VAR);
+
+    return f_val;
+}
+
 static inline int16_t float32_to_q16(float32 a STATUS_PARAM)
 {
     int32 q_val;
@@ -4576,6 +5118,58 @@ static inline int32 float64_to_q32(float64 a STATUS_PARAM)
     return (int32)q_val;
 }
 
+void helper_msa_ffql_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(W(pwx, i), from_q16, HL(pws, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(D(pwx, i), from_q32, WL(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_ffqr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(W(pwx, i), from_q16, HR(pws, i), 32);
+        } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_UNOP(D(pwx, i), from_q32, WR(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    msa_move_v(pwd, pwx);
+}
+
 void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
         uint32_t ws, uint32_t wt)
 {
@@ -4608,6 +5202,68 @@ void helper_msa_ftq_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
 
     msa_move_v(pwd, pwx);
 }
+void helper_msa_frcp_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_RECIPROCAL(W(pwx, i), W(pws, i), 32);
+         } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_RECIPROCAL(D(pwx, i), D(pws, i), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
+void helper_msa_frsqrt_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+        uint32_t ws)
+{
+    wr_t wx, *pwx = &wx;
+    void *pwd = &(env->active_fpu.fpr[wd]);
+    void *pws = &(env->active_fpu.fpr[ws]);
+
+    clear_msacsr_cause(env);
+
+    switch (df) {
+    case DF_WORD:
+        ALL_W_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_RECIPROCAL(W(pwx, i), float32_sqrt(W(pws, i),
+                    &env->active_msa.fp_status), 32);
+         } DONE_ALL_ELEMENTS;
+        break;
+    case DF_DOUBLE:
+        ALL_D_ELEMENTS(i, MSA_WRLEN) {
+            MSA_FLOAT_RECIPROCAL(D(pwx, i), float64_sqrt(D(pws, i),
+                    &env->active_msa.fp_status), 64);
+        } DONE_ALL_ELEMENTS;
+        break;
+    default:
+        /* shouldn't get here */
+        assert(0);
+    }
+
+    check_msacsr_cause(env);
+
+    msa_move_v(pwd, pwx);
+}
+
 
 target_ulong helper_msa_cfcmsa(CPUMIPSState *env, uint32_t cs)
 {
diff --git a/target-mips/translate.c b/target-mips/translate.c
index 4185019..dcfe830 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -15737,6 +15737,82 @@ static void gen_msa_vec(CPUMIPSState *env, DisasContext *ctx)
         }
         break;
     case OPC_MSA_2RF:
+        {
+            uint8_t df = (ctx->opcode >> 16) & 0x1 /* df [16:16] */;
+            /* adjust df value for floating-point instruction */
+            df = df + 2;
+            TCGv_i32 tdf = tcg_const_i32(df);
+
+            switch (MASK_MSA_2RF(opcode)) {
+            case OPC_MSA_FCLASS_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_fclass_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FTRUNC_S_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_ftrunc_s_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FTRUNC_U_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_ftrunc_u_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FSQRT_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_fsqrt_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FRSQRT_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_frsqrt_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FRCP_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_frcp_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FRINT_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_frint_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FRLOG2_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_flog2_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FEXUPL_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_fexupl_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FEXUPR_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_fexupr_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FFQL_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_ffql_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FFQR_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_ffqr_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FINT_S_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_ftint_s_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FINT_U_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_ftint_u_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FFINT_S_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_ffint_s_df(cpu_env, tdf, twd, tws);
+                break;
+            case OPC_MSA_FFINT_U_df:
+                check_msa_access(env, ctx, -1, ws, wd);
+                gen_helper_msa_ffint_u_df(cpu_env, tdf, twd, tws);
+                break;
+            }
+
+            tcg_temp_free_i32(tdf);
+        }
+        break;
     default:
         break;
     }
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 18/20] target-mips: add MSA MI10 format instructions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (16 preceding siblings ...)
  2014-07-14  9:56 ` [Qemu-devel] [PATCH 17/20] target-mips: add MSA 2RF " Yongbok Kim
@ 2014-07-14  9:56 ` Yongbok Kim
  2014-07-14  9:56 ` [Qemu-devel] [PATCH 19/20] disas/mips.c: disassemble MSA instructions Yongbok Kim
  2014-07-14  9:56 ` [Qemu-devel] [PATCH 20/20] target-mips: add MSA support to mips32r5-generic Yongbok Kim
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:56 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA MI10 format instructions

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/helper.h     |    2 +
 target-mips/msa_helper.c |   75 ++++++++++++++++++++++++++++++++++++++++++++++
 target-mips/translate.c  |   43 ++++++++++++++++++++++++++
 3 files changed, 120 insertions(+), 0 deletions(-)

diff --git a/target-mips/helper.h b/target-mips/helper.h
index c86bd36..89ca4d1 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -806,6 +806,7 @@ DEF_HELPER_5(msa_ilvod_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_ilvr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_insert_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_insve_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_ld_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_4(msa_ldi_df, void, env, i32, i32, i32)
 DEF_HELPER_5(msa_madd_q_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_maddr_q_df, void, env, i32, i32, i32, i32)
@@ -855,6 +856,7 @@ DEF_HELPER_5(msa_srl_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srli_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srlr_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_srlri_df, void, env, i32, i32, i32, i32)
+DEF_HELPER_5(msa_st_df, void, env, i32, i32, i32, s64)
 DEF_HELPER_5(msa_subs_s_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_subs_u_df, void, env, i32, i32, i32, i32)
 DEF_HELPER_5(msa_subsus_u_df, void, env, i32, i32, i32, i32)
diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
index d152953..9827dfd 100644
--- a/target-mips/msa_helper.c
+++ b/target-mips/msa_helper.c
@@ -3240,6 +3240,81 @@ void helper_msa_msubr_q_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+static inline int64_t msa_ld_df(CPUMIPSState *env, uint32_t df_bits,
+        target_ulong addr)
+{
+    switch (df_bits) {
+    case 8:
+        return  do_ld8(env, addr, env->hflags & MIPS_HFLAG_KSU);
+    case 16:
+        return  do_ld16(env, addr, env->hflags & MIPS_HFLAG_KSU);
+    case 32:
+        return (int64_t) do_ld32(env, addr, env->hflags & MIPS_HFLAG_KSU);
+    case 64:
+        return (int64_t) do_ld64(env, addr, env->hflags & MIPS_HFLAG_KSU);
+    }
+    return 0;
+}
+
+void helper_msa_ld_df(CPUMIPSState *env, uint32_t df, uint32_t wd, uint32_t rs,
+        int64_t s10)
+{
+    int64_t td;
+    int df_bits = 8 * (1 << df);
+    int i;
+    target_ulong addr;
+    int16_t offset = s10 << df;
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        addr = env->active_tc.gpr[rs] + offset + (i << df);
+        td = msa_ld_df(env, df_bits, addr);
+        msa_store_wr_elem(env, td, wd, df, i);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
+static inline void msa_st_df(CPUMIPSState *env, uint32_t df_bits,
+        target_ulong addr, int64_t val)
+{
+    switch (df_bits) {
+    case 8:
+        do_st8(env, addr, val, env->hflags & MIPS_HFLAG_KSU);
+        break;
+    case 16:
+        do_st16(env, addr, val, env->hflags & MIPS_HFLAG_KSU);
+        break;
+    case 32:
+        do_st32(env, addr, val, env->hflags & MIPS_HFLAG_KSU);
+        break;
+    case 64:
+        do_st64(env, addr, val, env->hflags & MIPS_HFLAG_KSU);
+        break;
+    }
+}
+
+void helper_msa_st_df(CPUMIPSState *env, uint32_t df, uint32_t wd, uint32_t rs,
+        int64_t s10)
+{
+    int64_t td;
+    int df_bits = 8 * (1 << df);
+    int i;
+    target_ulong addr;
+    int16_t offset = s10 << df;
+
+    for (i = 0; i < MSA_WRLEN / df_bits; i++) {
+        addr = env->active_tc.gpr[rs] + offset + (i << df);
+        td = msa_load_wr_elem_i64(env, wd, df, i);
+        msa_st_df(env, df_bits, addr, td);
+    }
+
+    if (env->active_msa.msair & MSAIR_WRP_BIT) {
+        env->active_msa.msamodify |= (1 << wd);
+    }
+}
+
 #define FLOAT_ONE32 make_float32(0x3f8 << 20)
 #define FLOAT_ONE64 make_float64(0x3ffULL << 52)
 
diff --git a/target-mips/translate.c b/target-mips/translate.c
index dcfe830..7047248 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -15862,6 +15862,49 @@ static void gen_msa(CPUMIPSState *env, DisasContext *ctx)
     case OPC_MSA_VEC:
         gen_msa_vec(env, ctx);
         break;
+    case OPC_MSA_LD_B:
+    case OPC_MSA_LD_H:
+    case OPC_MSA_LD_W:
+    case OPC_MSA_LD_D:
+    case OPC_MSA_ST_B:
+    case OPC_MSA_ST_H:
+    case OPC_MSA_ST_W:
+    case OPC_MSA_ST_D:
+        {
+            int64_t s10 = (ctx->opcode >> 16) & 0x3ff /* s10 [25:16] */;
+            s10 = (s10 << 54) >> 54; /* sign extend s10 to 64 bits*/
+            uint8_t rs = (ctx->opcode >> 11) & 0x1f /* rs [15:11] */;
+            uint8_t wd = (ctx->opcode >> 6) & 0x1f /* wd [10:6] */;
+            uint8_t df = (ctx->opcode >> 0) & 0x3 /* df [1:0] */;
+
+            TCGv_i32 tdf = tcg_const_i32(df);
+            TCGv_i32 twd = tcg_const_i32(wd);
+            TCGv_i32 trs = tcg_const_i32(rs);
+            TCGv_i64 ts10 = tcg_const_i64(s10);
+
+            switch (MASK_MSA_MINOR(opcode)) {
+            case OPC_MSA_LD_B:
+            case OPC_MSA_LD_H:
+            case OPC_MSA_LD_W:
+            case OPC_MSA_LD_D:
+                check_msa_access(env, ctx, -1, -1, wd);
+                gen_helper_msa_ld_df(cpu_env, tdf, twd, trs, ts10);
+                break;
+            case OPC_MSA_ST_B:
+            case OPC_MSA_ST_H:
+            case OPC_MSA_ST_W:
+            case OPC_MSA_ST_D:
+                check_msa_access(env, ctx, -1, -1, wd);
+                gen_helper_msa_st_df(cpu_env, tdf, twd, trs, ts10);
+                break;
+            }
+
+            tcg_temp_free_i32(twd);
+            tcg_temp_free_i32(tdf);
+            tcg_temp_free_i32(trs);
+            tcg_temp_free_i64(ts10);
+        }
+        break;
     default:
         MIPS_INVAL("MSA instruction");
         generate_exception(ctx, EXCP_RI);
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 19/20] disas/mips.c: disassemble MSA instructions
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (17 preceding siblings ...)
  2014-07-14  9:56 ` [Qemu-devel] [PATCH 18/20] target-mips: add MSA MI10 " Yongbok Kim
@ 2014-07-14  9:56 ` Yongbok Kim
  2014-07-14  9:56 ` [Qemu-devel] [PATCH 20/20] target-mips: add MSA support to mips32r5-generic Yongbok Kim
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:56 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

disassemble MIPS SIMD Architecture instructions

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 disas/mips.c |  721 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 719 insertions(+), 2 deletions(-)

diff --git a/disas/mips.c b/disas/mips.c
index 2106b57..65781f7 100644
--- a/disas/mips.c
+++ b/disas/mips.c
@@ -218,6 +218,28 @@ see <http://www.gnu.org/licenses/>.  */
 #define OP_SH_MTACC_D		13
 #define OP_MASK_MTACC_D		0x3
 
+/* MSA */
+#define OP_MASK_1BIT            0x1
+#define OP_SH_1BIT              16
+#define OP_MASK_2BIT            0x3
+#define OP_SH_2BIT              16
+#define OP_MASK_3BIT            0x7
+#define OP_SH_3BIT              16
+#define OP_MASK_4BIT            0xf
+#define OP_SH_4BIT              16
+#define OP_MASK_5BIT            0x1f
+#define OP_SH_5BIT              16
+#define OP_MASK_10BIT           0x3ff
+#define OP_SH_10BIT             11
+#define OP_MASK_MSACR11         0x1f
+#define OP_SH_MSACR11           11
+#define OP_MASK_MSACR6          0x1f
+#define OP_SH_MSACR6            6
+#define OP_MASK_GPR             0x1f
+#define OP_SH_GPR               6
+#define OP_MASK_1_TO_4          0x3
+#define OP_SH_1_TO_4            6
+
 #define	OP_OP_COP0		0x10
 #define	OP_OP_COP1		0x11
 #define	OP_OP_COP2		0x12
@@ -502,6 +524,9 @@ struct mips_opcode
 /* Instruction writes MDMX accumulator. */
 #define INSN2_WRITE_MDMX_ACC	    0x00000004
 
+/* Reads the general purpose register in OP_*_RD.  */
+#define INSN2_READ_GPR_D    0x00000200
+
 /* Instruction is actually a macro.  It should be ignored by the
    disassembler, and requires special treatment by the assembler.  */
 #define INSN_MACRO                  0xffffffff
@@ -557,7 +582,12 @@ struct mips_opcode
 #define INSN_5500		  0x02000000
 
 /* MDMX ASE */
-#define INSN_MDMX                 0x04000000
+#define INSN_MDMX                 0x00000000    /* Deprecated */
+
+/* MIPS MSA Extension */
+#define INSN_MSA                  0x04000000
+#define INSN_MSA64                0x04000000
+
 /* MT ASE */
 #define INSN_MT                   0x08000000
 /* SmartMIPS ASE  */
@@ -1190,6 +1220,17 @@ extern const int bfd_mips16_num_opcodes;
 /* MIPS MT ASE support.  */
 #define MT32	INSN_MT
 
+/* MSA */
+#define MSA     INSN_MSA
+#define MSA64   INSN_MSA64
+#define WR_VD   INSN_WRITE_FPR_D    /* Reuse INSN_WRITE_FPR_D */
+#define RD_VD   WR_VD               /* Reuse WR_VD */
+#define RD_VT   INSN_READ_FPR_T     /* Reuse INSN_READ_FPR_T */
+#define RD_VS   INSN_READ_FPR_S     /* Reuse INSN_READ_FPR_S */
+#define RD_d    INSN2_READ_GPR_D    /* Reuse INSN2_READ_GPR_D */
+
+#define RD_rd6  0
+
 /* The order of overloaded instructions matters.  Label arguments and
    register arguments look the same. Instructions that can have either
    for arguments must apear in the correct order in this table for the
@@ -1209,6 +1250,538 @@ const struct mips_opcode mips_builtin_opcodes[] =
    them first.  The assemblers uses a hash table based on the
    instruction name anyhow.  */
 /* name,    args,	match,	    mask,	pinfo,          	membership */
+/* MSA */
+{"sll.b",   "+d,+e,+f", 0x7800000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"sll.h",   "+d,+e,+f", 0x7820000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"sll.w",   "+d,+e,+f", 0x7840000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"sll.d",   "+d,+e,+f", 0x7860000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"slli.b",  "+d,+e,+7", 0x78700009, 0xfff8003f, WR_VD|RD_VS,        0, MSA},
+{"slli.h",  "+d,+e,+8", 0x78600009, 0xfff0003f, WR_VD|RD_VS,        0, MSA},
+{"slli.w",  "+d,+e,+9", 0x78400009, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"slli.d",  "+d,+e,'",  0x78000009, 0xffc0003f, WR_VD|RD_VS,        0, MSA},
+{"sra.b",   "+d,+e,+f", 0x7880000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"sra.h",   "+d,+e,+f", 0x78a0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"sra.w",   "+d,+e,+f", 0x78c0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"sra.d",   "+d,+e,+f", 0x78e0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srai.b",  "+d,+e,+7", 0x78f00009, 0xfff8003f, WR_VD|RD_VS,        0, MSA},
+{"srai.h",  "+d,+e,+8", 0x78e00009, 0xfff0003f, WR_VD|RD_VS,        0, MSA},
+{"srai.w",  "+d,+e,+9", 0x78c00009, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"srai.d",  "+d,+e,'",  0x78800009, 0xffc0003f, WR_VD|RD_VS,        0, MSA},
+{"srl.b",   "+d,+e,+f", 0x7900000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srl.h",   "+d,+e,+f", 0x7920000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srl.w",   "+d,+e,+f", 0x7940000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srl.d",   "+d,+e,+f", 0x7960000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srli.b",  "+d,+e,+7", 0x79700009, 0xfff8003f, WR_VD|RD_VS,        0, MSA},
+{"srli.h",  "+d,+e,+8", 0x79600009, 0xfff0003f, WR_VD|RD_VS,        0, MSA},
+{"srli.w",  "+d,+e,+9", 0x79400009, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"srli.d",  "+d,+e,'",  0x79000009, 0xffc0003f, WR_VD|RD_VS,        0, MSA},
+{"bclr.b",  "+d,+e,+f", 0x7980000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bclr.h",  "+d,+e,+f", 0x79a0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bclr.w",  "+d,+e,+f", 0x79c0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bclr.d",  "+d,+e,+f", 0x79e0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bclri.b", "+d,+e,+7", 0x79f00009, 0xfff8003f, WR_VD|RD_VS,        0, MSA},
+{"bclri.h", "+d,+e,+8", 0x79e00009, 0xfff0003f, WR_VD|RD_VS,        0, MSA},
+{"bclri.w", "+d,+e,+9", 0x79c00009, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"bclri.d", "+d,+e,'",  0x79800009, 0xffc0003f, WR_VD|RD_VS,        0, MSA},
+{"bset.b",  "+d,+e,+f", 0x7a00000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bset.h",  "+d,+e,+f", 0x7a20000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bset.w",  "+d,+e,+f", 0x7a40000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bset.d",  "+d,+e,+f", 0x7a60000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bseti.b", "+d,+e,+7", 0x7a700009, 0xfff8003f, WR_VD|RD_VS,        0, MSA},
+{"bseti.h", "+d,+e,+8", 0x7a600009, 0xfff0003f, WR_VD|RD_VS,        0, MSA},
+{"bseti.w", "+d,+e,+9", 0x7a400009, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"bseti.d", "+d,+e,'",  0x7a000009, 0xffc0003f, WR_VD|RD_VS,        0, MSA},
+{"bneg.b",  "+d,+e,+f", 0x7a80000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bneg.h",  "+d,+e,+f", 0x7aa0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bneg.w",  "+d,+e,+f", 0x7ac0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bneg.d",  "+d,+e,+f", 0x7ae0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bnegi.b", "+d,+e,+7", 0x7af00009, 0xfff8003f, WR_VD|RD_VS,        0, MSA},
+{"bnegi.h", "+d,+e,+8", 0x7ae00009, 0xfff0003f, WR_VD|RD_VS,        0, MSA},
+{"bnegi.w", "+d,+e,+9", 0x7ac00009, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"bnegi.d", "+d,+e,'",  0x7a800009, 0xffc0003f, WR_VD|RD_VS,        0, MSA},
+{"binsl.b", "+d,+e,+f", 0x7b00000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"binsl.h", "+d,+e,+f", 0x7b20000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"binsl.w", "+d,+e,+f", 0x7b40000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"binsl.d", "+d,+e,+f", 0x7b60000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"binsli.b", "+d,+e,+7", 0x7b700009, 0xfff8003f, WR_VD|RD_VS,       0, MSA},
+{"binsli.h", "+d,+e,+8", 0x7b600009, 0xfff0003f, WR_VD|RD_VS,       0, MSA},
+{"binsli.w", "+d,+e,+9", 0x7b400009, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"binsli.d", "+d,+e,'",  0x7b000009, 0xffc0003f, WR_VD|RD_VS,       0, MSA},
+{"binsr.b", "+d,+e,+f", 0x7b80000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"binsr.h", "+d,+e,+f", 0x7ba0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"binsr.w", "+d,+e,+f", 0x7bc0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"binsr.d", "+d,+e,+f", 0x7be0000d, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"binsri.b", "+d,+e,+7", 0x7bf00009, 0xfff8003f, WR_VD|RD_VS,       0, MSA},
+{"binsri.h", "+d,+e,+8", 0x7be00009, 0xfff0003f, WR_VD|RD_VS,       0, MSA},
+{"binsri.w", "+d,+e,+9", 0x7bc00009, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"binsri.d", "+d,+e,'",  0x7b800009, 0xffc0003f, WR_VD|RD_VS,       0, MSA},
+{"addv.b",  "+d,+e,+f", 0x7800000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"addv.h",  "+d,+e,+f", 0x7820000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"addv.w",  "+d,+e,+f", 0x7840000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"addv.d",  "+d,+e,+f", 0x7860000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"addvi.b", "+d,+e,k",  0x78000006, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"addvi.h", "+d,+e,k",  0x78200006, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"addvi.w", "+d,+e,k",  0x78400006, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"addvi.d", "+d,+e,k",  0x78600006, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"subv.b",  "+d,+e,+f", 0x7880000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"subv.h",  "+d,+e,+f", 0x78a0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"subv.w",  "+d,+e,+f", 0x78c0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"subv.d",  "+d,+e,+f", 0x78e0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"subvi.b", "+d,+e,k",  0x78800006, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"subvi.h", "+d,+e,k",  0x78a00006, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"subvi.w", "+d,+e,k",  0x78c00006, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"subvi.d", "+d,+e,k",  0x78e00006, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"max_s.b", "+d,+e,+f", 0x7900000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"max_s.h", "+d,+e,+f", 0x7920000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"max_s.w", "+d,+e,+f", 0x7940000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"max_s.d", "+d,+e,+f", 0x7960000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"maxi_s.b", "+d,+e,+5", 0x79000006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"maxi_s.h", "+d,+e,+5", 0x79200006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"maxi_s.w", "+d,+e,+5", 0x79400006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"maxi_s.d", "+d,+e,+5", 0x79600006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"max_u.b", "+d,+e,+f", 0x7980000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"max_u.h", "+d,+e,+f", 0x79a0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"max_u.w", "+d,+e,+f", 0x79c0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"max_u.d", "+d,+e,+f", 0x79e0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"maxi_u.b", "+d,+e,k",  0x79800006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"maxi_u.h", "+d,+e,k",  0x79a00006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"maxi_u.w", "+d,+e,k",  0x79c00006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"maxi_u.d", "+d,+e,k",  0x79e00006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"min_s.b", "+d,+e,+f", 0x7a00000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"min_s.h", "+d,+e,+f", 0x7a20000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"min_s.w", "+d,+e,+f", 0x7a40000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"min_s.d", "+d,+e,+f", 0x7a60000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mini_s.b", "+d,+e,+5", 0x7a000006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"mini_s.h", "+d,+e,+5", 0x7a200006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"mini_s.w", "+d,+e,+5", 0x7a400006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"mini_s.d", "+d,+e,+5", 0x7a600006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"min_u.b", "+d,+e,+f", 0x7a80000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"min_u.h", "+d,+e,+f", 0x7aa0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"min_u.w", "+d,+e,+f", 0x7ac0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"min_u.d", "+d,+e,+f", 0x7ae0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mini_u.b", "+d,+e,k",  0x7a800006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"mini_u.h", "+d,+e,k",  0x7aa00006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"mini_u.w", "+d,+e,k",  0x7ac00006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"mini_u.d", "+d,+e,k",  0x7ae00006, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"max_a.b", "+d,+e,+f", 0x7b00000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"max_a.h", "+d,+e,+f", 0x7b20000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"max_a.w", "+d,+e,+f", 0x7b40000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"max_a.d", "+d,+e,+f", 0x7b60000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"min_a.b", "+d,+e,+f", 0x7b80000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"min_a.h", "+d,+e,+f", 0x7ba0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"min_a.w", "+d,+e,+f", 0x7bc0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"min_a.d", "+d,+e,+f", 0x7be0000e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ceq.b",   "+d,+e,+f", 0x7800000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ceq.h",   "+d,+e,+f", 0x7820000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ceq.w",   "+d,+e,+f", 0x7840000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ceq.d",   "+d,+e,+f", 0x7860000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ceqi.b",  "+d,+e,+5", 0x78000007, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"ceqi.h",  "+d,+e,+5", 0x78200007, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"ceqi.w",  "+d,+e,+5", 0x78400007, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"ceqi.d",  "+d,+e,+5", 0x78600007, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"clt_s.b", "+d,+e,+f", 0x7900000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"clt_s.h", "+d,+e,+f", 0x7920000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"clt_s.w", "+d,+e,+f", 0x7940000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"clt_s.d", "+d,+e,+f", 0x7960000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"clti_s.b", "+d,+e,+5", 0x79000007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clti_s.h", "+d,+e,+5", 0x79200007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clti_s.w", "+d,+e,+5", 0x79400007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clti_s.d", "+d,+e,+5", 0x79600007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clt_u.b", "+d,+e,+f", 0x7980000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"clt_u.h", "+d,+e,+f", 0x79a0000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"clt_u.w", "+d,+e,+f", 0x79c0000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"clt_u.d", "+d,+e,+f", 0x79e0000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"clti_u.b", "+d,+e,k",  0x79800007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clti_u.h", "+d,+e,k",  0x79a00007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clti_u.w", "+d,+e,k",  0x79c00007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clti_u.d", "+d,+e,k",  0x79e00007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"cle_s.b", "+d,+e,+f", 0x7a00000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"cle_s.h", "+d,+e,+f", 0x7a20000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"cle_s.w", "+d,+e,+f", 0x7a40000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"cle_s.d", "+d,+e,+f", 0x7a60000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"clei_s.b", "+d,+e,+5", 0x7a000007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clei_s.h", "+d,+e,+5", 0x7a200007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clei_s.w", "+d,+e,+5", 0x7a400007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clei_s.d", "+d,+e,+5", 0x7a600007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"cle_u.b", "+d,+e,+f", 0x7a80000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"cle_u.h", "+d,+e,+f", 0x7aa0000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"cle_u.w", "+d,+e,+f", 0x7ac0000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"cle_u.d", "+d,+e,+f", 0x7ae0000f, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"clei_u.b", "+d,+e,k",  0x7a800007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clei_u.h", "+d,+e,k",  0x7aa00007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clei_u.w", "+d,+e,k",  0x7ac00007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"clei_u.d", "+d,+e,k",  0x7ae00007, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"ld.b",    "+d,+^(d)", 0x78000020, 0xfc00003f, WR_VD|LDD,       RD_d, MSA},
+{"ld.h",    "+d,+#(d)", 0x78000021, 0xfc00003f, WR_VD|LDD,       RD_d, MSA},
+{"ld.w",    "+d,+$(d)", 0x78000022, 0xfc00003f, WR_VD|LDD,       RD_d, MSA},
+{"ld.d",    "+d,+%(d)", 0x78000023, 0xfc00003f, WR_VD|LDD,       RD_d, MSA},
+{"st.b",    "+d,+^(d)", 0x78000024, 0xfc00003f, RD_VD|SM,        RD_d, MSA},
+{"st.h",    "+d,+#(d)", 0x78000025, 0xfc00003f, RD_VD|SM,        RD_d, MSA},
+{"st.w",    "+d,+$(d)", 0x78000026, 0xfc00003f, RD_VD|SM,        RD_d, MSA},
+{"st.d",    "+d,+%(d)", 0x78000027, 0xfc00003f, RD_VD|SM,        RD_d, MSA},
+{"sat_s.b", "+d,+e,+7", 0x7870000a, 0xfff8003f, WR_VD|RD_VS,        0, MSA},
+{"sat_s.h", "+d,+e,+8", 0x7860000a, 0xfff0003f, WR_VD|RD_VS,        0, MSA},
+{"sat_s.w", "+d,+e,+9", 0x7840000a, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"sat_s.d", "+d,+e,'",  0x7800000a, 0xffc0003f, WR_VD|RD_VS,        0, MSA},
+{"sat_u.b", "+d,+e,+7", 0x78f0000a, 0xfff8003f, WR_VD|RD_VS,        0, MSA},
+{"sat_u.h", "+d,+e,+8", 0x78e0000a, 0xfff0003f, WR_VD|RD_VS,        0, MSA},
+{"sat_u.w", "+d,+e,+9", 0x78c0000a, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"sat_u.d", "+d,+e,'",  0x7880000a, 0xffc0003f, WR_VD|RD_VS,        0, MSA},
+{"add_a.b", "+d,+e,+f", 0x78000010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"add_a.h", "+d,+e,+f", 0x78200010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"add_a.w", "+d,+e,+f", 0x78400010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"add_a.d", "+d,+e,+f", 0x78600010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"adds_a.b", "+d,+e,+f", 0x78800010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"adds_a.h", "+d,+e,+f", 0x78a00010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"adds_a.w", "+d,+e,+f", 0x78c00010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"adds_a.d", "+d,+e,+f", 0x78e00010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"adds_s.b", "+d,+e,+f", 0x79000010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"adds_s.h", "+d,+e,+f", 0x79200010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"adds_s.w", "+d,+e,+f", 0x79400010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"adds_s.d", "+d,+e,+f", 0x79600010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"adds_u.b", "+d,+e,+f", 0x79800010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"adds_u.h", "+d,+e,+f", 0x79a00010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"adds_u.w", "+d,+e,+f", 0x79c00010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"adds_u.d", "+d,+e,+f", 0x79e00010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"ave_s.b", "+d,+e,+f", 0x7a000010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ave_s.h", "+d,+e,+f", 0x7a200010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ave_s.w", "+d,+e,+f", 0x7a400010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ave_s.d", "+d,+e,+f", 0x7a600010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ave_u.b", "+d,+e,+f", 0x7a800010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ave_u.h", "+d,+e,+f", 0x7aa00010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ave_u.w", "+d,+e,+f", 0x7ac00010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ave_u.d", "+d,+e,+f", 0x7ae00010, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"aver_s.b", "+d,+e,+f", 0x7b000010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"aver_s.h", "+d,+e,+f", 0x7b200010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"aver_s.w", "+d,+e,+f", 0x7b400010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"aver_s.d", "+d,+e,+f", 0x7b600010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"aver_u.b", "+d,+e,+f", 0x7b800010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"aver_u.h", "+d,+e,+f", 0x7ba00010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"aver_u.w", "+d,+e,+f", 0x7bc00010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"aver_u.d", "+d,+e,+f", 0x7be00010, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subs_s.b", "+d,+e,+f", 0x78000011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subs_s.h", "+d,+e,+f", 0x78200011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subs_s.w", "+d,+e,+f", 0x78400011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subs_s.d", "+d,+e,+f", 0x78600011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subs_u.b", "+d,+e,+f", 0x78800011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subs_u.h", "+d,+e,+f", 0x78a00011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subs_u.w", "+d,+e,+f", 0x78c00011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subs_u.d", "+d,+e,+f", 0x78e00011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subsus_u.b", "+d,+e,+f", 0x79000011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subsus_u.h", "+d,+e,+f", 0x79200011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subsus_u.w", "+d,+e,+f", 0x79400011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subsus_u.d", "+d,+e,+f", 0x79600011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subsuu_s.b", "+d,+e,+f", 0x79800011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subsuu_s.h", "+d,+e,+f", 0x79a00011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subsuu_s.w", "+d,+e,+f", 0x79c00011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"subsuu_s.d", "+d,+e,+f", 0x79e00011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"asub_s.b", "+d,+e,+f", 0x7a000011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"asub_s.h", "+d,+e,+f", 0x7a200011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"asub_s.w", "+d,+e,+f", 0x7a400011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"asub_s.d", "+d,+e,+f", 0x7a600011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"asub_u.b", "+d,+e,+f", 0x7a800011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"asub_u.h", "+d,+e,+f", 0x7aa00011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"asub_u.w", "+d,+e,+f", 0x7ac00011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"asub_u.d", "+d,+e,+f", 0x7ae00011, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"mulv.b",  "+d,+e,+f", 0x78000012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mulv.h",  "+d,+e,+f", 0x78200012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mulv.w",  "+d,+e,+f", 0x78400012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mulv.d",  "+d,+e,+f", 0x78600012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"maddv.b", "+d,+e,+f", 0x78800012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"maddv.h", "+d,+e,+f", 0x78a00012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"maddv.w", "+d,+e,+f", 0x78c00012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"maddv.d", "+d,+e,+f", 0x78e00012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"msubv.b", "+d,+e,+f", 0x79000012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"msubv.h", "+d,+e,+f", 0x79200012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"msubv.w", "+d,+e,+f", 0x79400012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"msubv.d", "+d,+e,+f", 0x79600012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"div_s.b", "+d,+e,+f", 0x7a000012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"div_s.h", "+d,+e,+f", 0x7a200012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"div_s.w", "+d,+e,+f", 0x7a400012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"div_s.d", "+d,+e,+f", 0x7a600012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"div_u.b", "+d,+e,+f", 0x7a800012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"div_u.h", "+d,+e,+f", 0x7aa00012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"div_u.w", "+d,+e,+f", 0x7ac00012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"div_u.d", "+d,+e,+f", 0x7ae00012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mod_s.b", "+d,+e,+f", 0x7b000012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mod_s.h", "+d,+e,+f", 0x7b200012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mod_s.w", "+d,+e,+f", 0x7b400012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mod_s.d", "+d,+e,+f", 0x7b600012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mod_u.b", "+d,+e,+f", 0x7b800012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mod_u.h", "+d,+e,+f", 0x7ba00012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mod_u.w", "+d,+e,+f", 0x7bc00012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mod_u.d", "+d,+e,+f", 0x7be00012, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"dotp_s.h", "+d,+e,+f", 0x78200013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dotp_s.w", "+d,+e,+f", 0x78400013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dotp_s.d", "+d,+e,+f", 0x78600013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dotp_u.h", "+d,+e,+f", 0x78a00013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dotp_u.w", "+d,+e,+f", 0x78c00013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dotp_u.d", "+d,+e,+f", 0x78e00013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpadd_s.h", "+d,+e,+f", 0x79200013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpadd_s.w", "+d,+e,+f", 0x79400013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpadd_s.d", "+d,+e,+f", 0x79600013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpadd_u.h", "+d,+e,+f", 0x79a00013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpadd_u.w", "+d,+e,+f", 0x79c00013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpadd_u.d", "+d,+e,+f", 0x79e00013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpsub_s.h", "+d,+e,+f", 0x7a200013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpsub_s.w", "+d,+e,+f", 0x7a400013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpsub_s.d", "+d,+e,+f", 0x7a600013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpsub_u.h", "+d,+e,+f", 0x7aa00013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpsub_u.w", "+d,+e,+f", 0x7ac00013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"dpsub_u.d", "+d,+e,+f", 0x7ae00013, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"sld.b",   "+d,+e[t]", 0x78000014, 0xffe0003f, WR_VD|RD_VS|RD_t,   0, MSA},
+{"sld.h",   "+d,+e[t]", 0x78200014, 0xffe0003f, WR_VD|RD_VS|RD_t,   0, MSA},
+{"sld.w",   "+d,+e[t]", 0x78400014, 0xffe0003f, WR_VD|RD_VS|RD_t,   0, MSA},
+{"sld.d",   "+d,+e[t]", 0x78600014, 0xffe0003f, WR_VD|RD_VS|RD_t,   0, MSA},
+{"sldi.b",  "+d,+e[+9]", 0x78000019, 0xffe0003f, WR_VD|RD_VS,       0, MSA},
+{"sldi.h",  "+d,+e[+8]", 0x78200019, 0xfff0003f, WR_VD|RD_VS,       0, MSA},
+{"sldi.w",  "+d,+e[+7]", 0x78300019, 0xfff8003f, WR_VD|RD_VS,       0, MSA},
+{"sldi.d",  "+d,+e[+6]", 0x78380019, 0xfffc003f, WR_VD|RD_VS,       0, MSA},
+{"splat.b", "+d,+e[t]", 0x78800014, 0xffe0003f, WR_VD|RD_VS|RD_t,   0, MSA},
+{"splat.h", "+d,+e[t]", 0x78a00014, 0xffe0003f, WR_VD|RD_VS|RD_t,   0, MSA},
+{"splat.w", "+d,+e[t]", 0x78c00014, 0xffe0003f, WR_VD|RD_VS|RD_t,   0, MSA},
+{"splat.d", "+d,+e[t]", 0x78e00014, 0xffe0003f, WR_VD|RD_VS|RD_t,   0, MSA},
+{"splati.b", "+d,+e[+9]", 0x78400019, 0xffe0003f, WR_VD|RD_VS,      0, MSA},
+{"splati.h", "+d,+e[+8]", 0x78600019, 0xfff0003f, WR_VD|RD_VS,      0, MSA},
+{"splati.w", "+d,+e[+7]", 0x78700019, 0xfff8003f, WR_VD|RD_VS,      0, MSA},
+{"splati.d", "+d,+e[+6]", 0x78780019, 0xfffc003f, WR_VD|RD_VS,      0, MSA},
+{"pckev.b", "+d,+e,+f", 0x79000014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"pckev.h", "+d,+e,+f", 0x79200014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"pckev.w", "+d,+e,+f", 0x79400014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"pckev.d", "+d,+e,+f", 0x79600014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"pckod.b", "+d,+e,+f", 0x79800014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"pckod.h", "+d,+e,+f", 0x79a00014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"pckod.w", "+d,+e,+f", 0x79c00014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"pckod.d", "+d,+e,+f", 0x79e00014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvl.b",  "+d,+e,+f", 0x7a000014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvl.h",  "+d,+e,+f", 0x7a200014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvl.w",  "+d,+e,+f", 0x7a400014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvl.d",  "+d,+e,+f", 0x7a600014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvr.b",  "+d,+e,+f", 0x7a800014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvr.h",  "+d,+e,+f", 0x7aa00014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvr.w",  "+d,+e,+f", 0x7ac00014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvr.d",  "+d,+e,+f", 0x7ae00014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvev.b", "+d,+e,+f", 0x7b000014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvev.h", "+d,+e,+f", 0x7b200014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvev.w", "+d,+e,+f", 0x7b400014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvev.d", "+d,+e,+f", 0x7b600014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvod.b", "+d,+e,+f", 0x7b800014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvod.h", "+d,+e,+f", 0x7ba00014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvod.w", "+d,+e,+f", 0x7bc00014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ilvod.d", "+d,+e,+f", 0x7be00014, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"vshf.b",  "+d,+e,+f", 0x78000015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"vshf.h",  "+d,+e,+f", 0x78200015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"vshf.w",  "+d,+e,+f", 0x78400015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"vshf.d",  "+d,+e,+f", 0x78600015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srar.b",  "+d,+e,+f", 0x78800015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srar.h",  "+d,+e,+f", 0x78a00015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srar.w",  "+d,+e,+f", 0x78c00015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srar.d",  "+d,+e,+f", 0x78e00015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srari.b", "+d,+e,+7", 0x7970000a, 0xfff8003f, WR_VD|RD_VS,        0, MSA},
+{"srari.h", "+d,+e,+8", 0x7960000a, 0xfff0003f, WR_VD|RD_VS,        0, MSA},
+{"srari.w", "+d,+e,+9", 0x7940000a, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"srari.d", "+d,+e,'",  0x7900000a, 0xffc0003f, WR_VD|RD_VS,        0, MSA},
+{"srlr.b",  "+d,+e,+f", 0x79000015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srlr.h",  "+d,+e,+f", 0x79200015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srlr.w",  "+d,+e,+f", 0x79400015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srlr.d",  "+d,+e,+f", 0x79600015, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"srlri.b", "+d,+e,+7", 0x79f0000a, 0xfff8003f, WR_VD|RD_VS,        0, MSA},
+{"srlri.h", "+d,+e,+8", 0x79e0000a, 0xfff0003f, WR_VD|RD_VS,        0, MSA},
+{"srlri.w", "+d,+e,+9", 0x79c0000a, 0xffe0003f, WR_VD|RD_VS,        0, MSA},
+{"srlri.d", "+d,+e,'",  0x7980000a, 0xffc0003f, WR_VD|RD_VS,        0, MSA},
+{"hadd_s.h", "+d,+e,+f", 0x7a200015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"hadd_s.w", "+d,+e,+f", 0x7a400015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"hadd_s.d", "+d,+e,+f", 0x7a600015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"hadd_u.h", "+d,+e,+f", 0x7aa00015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"hadd_u.w", "+d,+e,+f", 0x7ac00015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"hadd_u.d", "+d,+e,+f", 0x7ae00015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"hsub_s.h", "+d,+e,+f", 0x7b200015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"hsub_s.w", "+d,+e,+f", 0x7b400015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"hsub_s.d", "+d,+e,+f", 0x7b600015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"hsub_u.h", "+d,+e,+f", 0x7ba00015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"hsub_u.w", "+d,+e,+f", 0x7bc00015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"hsub_u.d", "+d,+e,+f", 0x7be00015, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"and.v",   "+d,+e,+f", 0x7800001e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"andi.b",  "+d,+e,5",  0x78000000, 0xff00003f, WR_VD|RD_VS,        0, MSA},
+{"or.v",    "+d,+e,+f", 0x7820001e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ori.b",   "+d,+e,5",  0x79000000, 0xff00003f, WR_VD|RD_VS,        0, MSA},
+{"nor.v",   "+d,+e,+f", 0x7840001e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"nori.b",  "+d,+e,5",  0x7a000000, 0xff00003f, WR_VD|RD_VS,        0, MSA},
+{"xor.v",   "+d,+e,+f", 0x7860001e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"xori.b",  "+d,+e,5",  0x7b000000, 0xff00003f, WR_VD|RD_VS,        0, MSA},
+{"bmnz.v",  "+d,+e,+f", 0x7880001e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bmnzi.b", "+d,+e,5",  0x78000001, 0xff00003f, WR_VD|RD_VS,        0, MSA},
+{"bmz.v",   "+d,+e,+f", 0x78a0001e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bmzi.b",  "+d,+e,5",  0x79000001, 0xff00003f, WR_VD|RD_VS,        0, MSA},
+{"bsel.v",  "+d,+e,+f", 0x78c0001e, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"bseli.b", "+d,+e,5",  0x7a000001, 0xff00003f, WR_VD|RD_VS,        0, MSA},
+{"shf.b",   "+d,+e,5",  0x78000002, 0xff00003f, WR_VD|RD_VS,        0, MSA},
+{"shf.h",   "+d,+e,5",  0x79000002, 0xff00003f, WR_VD|RD_VS,        0, MSA},
+{"shf.w",   "+d,+e,5",  0x7a000002, 0xff00003f, WR_VD|RD_VS,        0, MSA},
+{"bnz.v",    "+f,p",    0x45e00000, 0xffe00000, CBD|RD_VT,          0, MSA},
+{"bz.v",    "+f,p",     0x45600000, 0xffe00000, CBD|RD_VT,          0, MSA},
+{"fill.b",  "+d,d",     0x7b00001e, 0xffff003f, WR_VD,           RD_d, MSA},
+{"fill.h",  "+d,d",     0x7b01001e, 0xffff003f, WR_VD,           RD_d, MSA},
+{"fill.w",  "+d,d",     0x7b02001e, 0xffff003f, WR_VD,           RD_d, MSA},
+{"fill.d",  "+d,d",     0x7b03001e, 0xffff003f, WR_VD,           RD_d, MSA64},
+{"pcnt.b",  "+d,+e",    0x7b04001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"pcnt.h",  "+d,+e",    0x7b05001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"pcnt.w",  "+d,+e",    0x7b06001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"pcnt.d",  "+d,+e",    0x7b07001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"nloc.b",  "+d,+e",    0x7b08001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"nloc.h",  "+d,+e",    0x7b09001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"nloc.w",  "+d,+e",    0x7b0a001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"nloc.d",  "+d,+e",    0x7b0b001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"nlzc.b",  "+d,+e",    0x7b0c001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"nlzc.h",  "+d,+e",    0x7b0d001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"nlzc.w",  "+d,+e",    0x7b0e001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"nlzc.d",  "+d,+e",    0x7b0f001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"copy_s.b", "+i,+e[+9]", 0x78800019, 0xffe0003f, RD_VS,       RD_rd6, MSA},
+{"copy_s.h", "+i,+e[+8]", 0x78a00019, 0xfff0003f, RD_VS,       RD_rd6, MSA},
+{"copy_s.w", "+i,+e[+7]", 0x78b00019, 0xfff8003f, RD_VS,       RD_rd6, MSA},
+{"copy_s.d", "+i,+e[+6]", 0x78b80019, 0xfffc003f, RD_VS,       RD_rd6, MSA64},
+{"copy_u.b", "+i,+e[+9]", 0x78c00019, 0xffe0003f, RD_VS,       RD_rd6, MSA},
+{"copy_u.h", "+i,+e[+8]", 0x78e00019, 0xfff0003f, RD_VS,       RD_rd6, MSA},
+{"copy_u.w", "+i,+e[+7]", 0x78f00019, 0xfff8003f, RD_VS,       RD_rd6, MSA},
+{"copy_u.d", "+i,+e[+6]", 0x78f80019, 0xfffc003f, RD_VS,       RD_rd6, MSA64},
+{"insert.b", "+d[+9],d", 0x79000019, 0xffe0003f, WR_VD|RD_VD,    RD_d, MSA},
+{"insert.h", "+d[+8],d", 0x79200019, 0xfff0003f, WR_VD|RD_VD,    RD_d, MSA},
+{"insert.w", "+d[+7],d", 0x79300019, 0xfff8003f, WR_VD|RD_VD,    RD_d, MSA},
+{"insert.d", "+d[+6],d", 0x79380019, 0xfffc003f, WR_VD|RD_VD,    RD_d, MSA64},
+{"insve.b", "+d[+9],+e[+~]", 0x79400019, 0xffe0003f, WR_VD|RD_VD|RD_VS, 0, MSA},
+{"insve.h", "+d[+8],+e[+~]", 0x79600019, 0xfff0003f, WR_VD|RD_VD|RD_VS, 0, MSA},
+{"insve.w", "+d[+7],+e[+~]", 0x79700019, 0xfff8003f, WR_VD|RD_VD|RD_VS, 0, MSA},
+{"insve.d", "+d[+6],+e[+~]", 0x79780019, 0xfffc003f, WR_VD|RD_VD|RD_VS, 0, MSA},
+{"bnz.b",    "+f,p",    0x47800000, 0xffe00000, CBD|RD_VT,          0, MSA},
+{"bnz.h",    "+f,p",    0x47a00000, 0xffe00000, CBD|RD_VT,          0, MSA},
+{"bnz.w",    "+f,p",    0x47c00000, 0xffe00000, CBD|RD_VT,          0, MSA},
+{"bnz.d",    "+f,p",    0x47e00000, 0xffe00000, CBD|RD_VT,          0, MSA},
+{"bz.b",    "+f,p",     0x47000000, 0xffe00000, CBD|RD_VT,          0, MSA},
+{"bz.h",    "+f,p",     0x47200000, 0xffe00000, CBD|RD_VT,          0, MSA},
+{"bz.w",    "+f,p",     0x47400000, 0xffe00000, CBD|RD_VT,          0, MSA},
+{"bz.d",    "+f,p",     0x47600000, 0xffe00000, CBD|RD_VT,          0, MSA},
+{"ldi.b",   "+d,+0",    0x7b000007, 0xffe0003f, WR_VD,              0, MSA},
+{"ldi.h",   "+d,+0",    0x7b200007, 0xffe0003f, WR_VD,              0, MSA},
+{"ldi.w",   "+d,+0",    0x7b400007, 0xffe0003f, WR_VD,              0, MSA},
+{"ldi.d",   "+d,+0",    0x7b600007, 0xffe0003f, WR_VD,              0, MSA},
+{"fcaf.w",  "+d,+e,+f", 0x7800001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcaf.d",  "+d,+e,+f", 0x7820001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcun.w",  "+d,+e,+f", 0x7840001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcun.d",  "+d,+e,+f", 0x7860001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fceq.w",  "+d,+e,+f", 0x7880001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fceq.d",  "+d,+e,+f", 0x78a0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcueq.w", "+d,+e,+f", 0x78c0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcueq.d", "+d,+e,+f", 0x78e0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fclt.w",  "+d,+e,+f", 0x7900001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fclt.d",  "+d,+e,+f", 0x7920001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcult.w", "+d,+e,+f", 0x7940001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcult.d", "+d,+e,+f", 0x7960001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcle.w",  "+d,+e,+f", 0x7980001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcle.d",  "+d,+e,+f", 0x79a0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcule.w", "+d,+e,+f", 0x79c0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcule.d", "+d,+e,+f", 0x79e0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsaf.w",  "+d,+e,+f", 0x7a00001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsaf.d",  "+d,+e,+f", 0x7a20001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsun.w",  "+d,+e,+f", 0x7a40001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsun.d",  "+d,+e,+f", 0x7a60001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fseq.w",  "+d,+e,+f", 0x7a80001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fseq.d",  "+d,+e,+f", 0x7aa0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsueq.w", "+d,+e,+f", 0x7ac0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsueq.d", "+d,+e,+f", 0x7ae0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fslt.w",  "+d,+e,+f", 0x7b00001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fslt.d",  "+d,+e,+f", 0x7b20001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsult.w", "+d,+e,+f", 0x7b40001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsult.d", "+d,+e,+f", 0x7b60001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsle.w",  "+d,+e,+f", 0x7b80001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsle.d",  "+d,+e,+f", 0x7ba0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsule.w", "+d,+e,+f", 0x7bc0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsule.d", "+d,+e,+f", 0x7be0001a, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fadd.w",  "+d,+e,+f", 0x7800001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fadd.d",  "+d,+e,+f", 0x7820001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsub.w",  "+d,+e,+f", 0x7840001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsub.d",  "+d,+e,+f", 0x7860001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fmul.w",  "+d,+e,+f", 0x7880001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fmul.d",  "+d,+e,+f", 0x78a0001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fdiv.w",  "+d,+e,+f", 0x78c0001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fdiv.d",  "+d,+e,+f", 0x78e0001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fmadd.w", "+d,+e,+f", 0x7900001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fmadd.d", "+d,+e,+f", 0x7920001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fmsub.w", "+d,+e,+f", 0x7940001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fmsub.d", "+d,+e,+f", 0x7960001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fexp2.w", "+d,+e,+f", 0x79c0001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fexp2.d", "+d,+e,+f", 0x79e0001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fexdo.h", "+d,+e,+f", 0x7a00001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fexdo.w", "+d,+e,+f", 0x7a20001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ftq.h",   "+d,+e,+f", 0x7a80001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"ftq.w",   "+d,+e,+f", 0x7aa0001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fmin.w",  "+d,+e,+f", 0x7b00001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fmin.d",  "+d,+e,+f", 0x7b20001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fmin_a.w", "+d,+e,+f", 0x7b40001b, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"fmin_a.d", "+d,+e,+f", 0x7b60001b, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"fmax.w",  "+d,+e,+f", 0x7b80001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fmax.d",  "+d,+e,+f", 0x7ba0001b, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fmax_a.w", "+d,+e,+f", 0x7bc0001b, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"fmax_a.d", "+d,+e,+f", 0x7be0001b, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"fcor.w",  "+d,+e,+f", 0x7840001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcor.d",  "+d,+e,+f", 0x7860001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcune.w", "+d,+e,+f", 0x7880001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcune.d", "+d,+e,+f", 0x78a0001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcne.w",  "+d,+e,+f", 0x78c0001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fcne.d",  "+d,+e,+f", 0x78e0001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mul_q.h", "+d,+e,+f", 0x7900001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mul_q.w", "+d,+e,+f", 0x7920001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"madd_q.h", "+d,+e,+f", 0x7940001c, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"madd_q.w", "+d,+e,+f", 0x7960001c, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"msub_q.h", "+d,+e,+f", 0x7980001c, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"msub_q.w", "+d,+e,+f", 0x79a0001c, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"fsor.w",  "+d,+e,+f", 0x7a40001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsor.d",  "+d,+e,+f", 0x7a60001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsune.w", "+d,+e,+f", 0x7a80001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsune.d", "+d,+e,+f", 0x7aa0001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsne.w",  "+d,+e,+f", 0x7ac0001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"fsne.d",  "+d,+e,+f", 0x7ae0001c, 0xffe0003f, WR_VD|RD_VS|RD_VT,  0, MSA},
+{"mulr_q.h", "+d,+e,+f", 0x7b00001c, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"mulr_q.w", "+d,+e,+f", 0x7b20001c, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"maddr_q.h", "+d,+e,+f", 0x7b40001c, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"maddr_q.w", "+d,+e,+f", 0x7b60001c, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"msubr_q.h", "+d,+e,+f", 0x7b80001c, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"msubr_q.w", "+d,+e,+f", 0x7ba0001c, 0xffe0003f, WR_VD|RD_VS|RD_VT, 0, MSA},
+{"fclass.w", "+d,+e",    0x7b20001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"fclass.d", "+d,+e",    0x7b21001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"fsqrt.w", "+d,+e",    0x7b26001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"fsqrt.d", "+d,+e",    0x7b27001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"frsqrt.w", "+d,+e",    0x7b28001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"frsqrt.d", "+d,+e",    0x7b29001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"frcp.w",  "+d,+e",    0x7b2a001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"frcp.d",  "+d,+e",    0x7b2b001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"frint.w", "+d,+e",    0x7b2c001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"frint.d", "+d,+e",    0x7b2d001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"flog2.w", "+d,+e",    0x7b2e001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"flog2.d", "+d,+e",    0x7b2f001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"fexupl.w", "+d,+e",    0x7b30001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"fexupl.d", "+d,+e",    0x7b31001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"fexupr.w", "+d,+e",    0x7b32001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"fexupr.d", "+d,+e",    0x7b33001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ffql.w",  "+d,+e",    0x7b34001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"ffql.d",  "+d,+e",    0x7b35001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"ffqr.w",  "+d,+e",    0x7b36001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"ffqr.d",  "+d,+e",    0x7b37001e, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+{"ftint_s.w", "+d,+e",   0x7b38001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ftint_s.d", "+d,+e",   0x7b39001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ftint_u.w", "+d,+e",   0x7b3a001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ftint_u.d", "+d,+e",   0x7b3b001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ffint_s.w", "+d,+e",   0x7b3c001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ffint_s.d", "+d,+e",   0x7b3d001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ffint_u.w", "+d,+e",   0x7b3e001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ffint_u.d", "+d,+e",   0x7b3f001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ftrunc_s.w", "+d,+e",  0x7b40001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ftrunc_s.d", "+d,+e",  0x7b41001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ftrunc_u.w", "+d,+e",  0x7b42001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ftrunc_u.d", "+d,+e",  0x7b43001e, 0xffff003f, WR_VD|RD_VS,       0, MSA},
+{"ctcmsa",  "+h,d",     0x783e0019, 0xffff003f, COD,             RD_d, MSA},
+{"cfcmsa",  "+i,+g",    0x787e0019, 0xffff003f, COD,                0, MSA},
+{"move.v",  "+d,+e",    0x78be0019, 0xffff003f, WR_VD|RD_VS,        0, MSA},
+
 {"pref",    "k,o(b)",   0xcc000000, 0xfc000000, RD_b,           	0,		I4|I32|G3	},
 {"prefx",   "h,t(b)",	0x4c00000f, 0xfc0007ff, RD_b|RD_t,		0,		I4|I33	},
 {"nop",     "",         0x00000000, 0xffffffff, 0,              	INSN2_ALIAS,	I1      }, /* sll */
@@ -2843,6 +3416,13 @@ static const char * const mips_fpr_names_64[32] =
   "fs0",  "fs1",  "fs2",  "fs3",  "fs4",  "fs5",  "fs6",  "fs7"
 };
 
+static const char * const mips_wr_names[32] = {
+  "w0",  "w1",  "w2",  "w3",  "w4",  "w5",  "w6",  "w7",
+  "w8",  "w9",  "w10", "w11", "w12", "w13", "w14", "w15",
+  "w16", "w17", "w18", "w19", "w20", "w21", "w22", "w23",
+  "w24", "w25", "w26", "w27", "w28", "w29", "w30", "w31"
+};
+
 static const char * const mips_cp0_names_numeric[32] =
 {
   "$0",   "$1",   "$2",   "$3",   "$4",   "$5",   "$6",   "$7",
@@ -2895,6 +3475,10 @@ static const struct mips_cp0sel_name mips_cp0sel_names_mips3264[] =
   { 16, 1, "c0_config1"		},
   { 16, 2, "c0_config2"		},
   { 16, 3, "c0_config3"		},
+  { 16, 4, "c0_config4"         },
+  { 16, 5, "c0_config5"         },
+  { 16, 6, "c0_config6"         },
+  { 16, 7, "c0_config7"         },
   { 18, 1, "c0_watchlo,1"	},
   { 18, 2, "c0_watchlo,2"	},
   { 18, 3, "c0_watchlo,3"	},
@@ -2962,6 +3546,10 @@ static const struct mips_cp0sel_name mips_cp0sel_names_mips3264r2[] =
   { 16, 1, "c0_config1"		},
   { 16, 2, "c0_config2"		},
   { 16, 3, "c0_config3"		},
+  { 16, 4, "c0_config4"         },
+  { 16, 5, "c0_config5"         },
+  { 16, 6, "c0_config6"         },
+  { 16, 7, "c0_config7"         },
   { 18, 1, "c0_watchlo,1"	},
   { 18, 2, "c0_watchlo,2"	},
   { 18, 3, "c0_watchlo,3"	},
@@ -3061,6 +3649,20 @@ static const char * const mips_hwr_names_mips3264r2[32] =
   "$24",  "$25",  "$26",  "$27",  "$28",  "$29",  "$30",  "$31"
 };
 
+static const char * const mips_msa_control_names_numeric[32] = {
+  "$0",   "$1",   "$2",   "$3",   "$4",   "$5",   "$6",   "$7",
+  "$8",   "$9",   "$10",  "$11",  "$12",  "$13",  "$14",  "$15",
+  "$16",  "$17",  "$18",  "$19",  "$20",  "$21",  "$22",  "$23",
+  "$24",  "$25",  "$26",  "$27",  "$28",  "$29",  "$30",  "$31"
+};
+
+static const char * const mips_msa_control_names_mips3264r2[32] = {
+  "MSAIR", "MSACSR", "$2", "$3",  "$4",   "$5",   "$6",   "$7",
+  "$8",   "$9",   "$10",  "$11",  "$12",  "$13",  "$14",  "$15",
+  "$16",  "$17",  "$18",  "$19",  "$20",  "$21",  "$22",  "$23",
+  "$24",  "$25",  "$26",  "$27",  "$28",  "$29",  "$30",  "$31"
+};
+
 struct mips_abi_choice
 {
   const char *name;
@@ -3178,7 +3780,7 @@ static const struct mips_arch_choice mips_arch_choices[] =
 
   { "mips32r2",	1, bfd_mach_mipsisa32r2, CPU_MIPS32R2,
     (ISA_MIPS32R2 | INSN_MIPS16 | INSN_SMARTMIPS | INSN_DSP | INSN_DSPR2
-     | INSN_MIPS3D | INSN_MT),
+     | INSN_MIPS3D | INSN_MT | INSN_MSA),
     mips_cp0_names_mips3264r2,
     mips_cp0sel_names_mips3264r2, ARRAY_SIZE (mips_cp0sel_names_mips3264r2),
     mips_hwr_names_mips3264r2 },
@@ -3532,6 +4134,89 @@ print_insn_args (const char *d,
 				     (l >> OP_SH_UDI4) & OP_MASK_UDI4);
 	      break;
 
+        case '5': /* 5-bit signed immediate in bit 16 */
+            delta = ((l >> OP_SH_RT) & OP_MASK_RT);
+            if (delta & 0x10) { /* test sign bit */
+                delta |= ~OP_MASK_RT;
+            }
+            (*info->fprintf_func) (info->stream, "%d", delta);
+            break;
+
+        case '6':
+            (*info->fprintf_func) (info->stream, "0x%lx",
+                    (l >> OP_SH_2BIT) & OP_MASK_2BIT);
+            break;
+
+        case '7':
+            (*info->fprintf_func) (info->stream, "0x%lx",
+                    (l >> OP_SH_3BIT) & OP_MASK_3BIT);
+            break;
+
+        case '8':
+            (*info->fprintf_func) (info->stream, "0x%lx",
+                    (l >> OP_SH_4BIT) & OP_MASK_4BIT);
+            break;
+
+        case '9':
+            (*info->fprintf_func) (info->stream, "0x%lx",
+                    (l >> OP_SH_5BIT) & OP_MASK_5BIT);
+            break;
+
+        case ':':
+            (*info->fprintf_func) (info->stream, "0x%lx",
+                    (l >> OP_SH_1BIT) & OP_MASK_1BIT);
+            break;
+
+        case '!': /* 10-bit pc-relative target in bit 11 */
+            delta = ((l >> OP_SH_10BIT) & OP_MASK_10BIT);
+            if (delta & 0x200) { /* test sign bit */
+                delta |= ~OP_MASK_10BIT;
+            }
+            info->target = (delta << 2) + pc + INSNLEN;
+            (*info->print_address_func) (info->target, info);
+            break;
+
+        case '~':
+            (*info->fprintf_func) (info->stream, "0");
+            break;
+
+        case '@':
+            (*info->fprintf_func) (info->stream, "0x%lx",
+                    ((l >> OP_SH_1_TO_4) & OP_MASK_1_TO_4)+1);
+            break;
+
+        case '^': /* 10-bit signed immediate << 0 in bit 16 */
+            delta = ((l >> OP_SH_IMM10) & OP_MASK_IMM10);
+            if (delta & 0x200) { /* test sign bit */
+                delta |= ~OP_MASK_IMM10;
+            }
+            (*info->fprintf_func) (info->stream, "%d", delta);
+            break;
+
+        case '#': /* 10-bit signed immediate << 1 in bit 16 */
+            delta = ((l >> OP_SH_IMM10) & OP_MASK_IMM10);
+            if (delta & 0x200) { /* test sign bit */
+                delta |= ~OP_MASK_IMM10;
+            }
+            (*info->fprintf_func) (info->stream, "%d", delta << 1);
+            break;
+
+        case '$': /* 10-bit signed immediate << 2 in bit 16 */
+            delta = ((l >> OP_SH_IMM10) & OP_MASK_IMM10);
+            if (delta & 0x200) { /* test sign bit */
+                delta |= ~OP_MASK_IMM10;
+            }
+            (*info->fprintf_func) (info->stream, "%d", delta << 2);
+            break;
+
+        case '%': /* 10-bit signed immediate << 3 in bit 16 */
+            delta = ((l >> OP_SH_IMM10) & OP_MASK_IMM10);
+            if (delta & 0x200) { /* test sign bit */
+                delta |= ~OP_MASK_IMM10;
+            }
+            (*info->fprintf_func) (info->stream, "%d", delta << 3);
+            break;
+
 	    case 'C':
 	    case 'H':
 	      msbd = (l >> OP_SH_EXTMSBD) & OP_MASK_EXTMSBD;
@@ -3603,6 +4288,38 @@ print_insn_args (const char *d,
 		break;
 	      }
 
+        case 'd':
+            (*info->fprintf_func) (info->stream, "%s",
+                    mips_wr_names[(l >> OP_SH_FD) & OP_MASK_FD]);
+            break;
+
+        case 'e':
+            (*info->fprintf_func) (info->stream, "%s",
+                    mips_wr_names[(l >> OP_SH_FS) & OP_MASK_FS]);
+            break;
+
+        case 'f':
+            (*info->fprintf_func) (info->stream, "%s",
+                    mips_wr_names[(l >> OP_SH_FT) & OP_MASK_FT]);
+            break;
+
+        case 'g':
+            (*info->fprintf_func) (info->stream, "%s",
+                    mips_msa_control_names_mips3264r2[(l >> OP_SH_MSACR11)
+                                                      & OP_MASK_MSACR11]);
+            break;
+
+        case 'h':
+            (*info->fprintf_func) (info->stream, "%s",
+                    mips_msa_control_names_mips3264r2[(l >> OP_SH_MSACR6)
+                                                      & OP_MASK_MSACR6]);
+            break;
+
+        case 'i':
+            (*info->fprintf_func) (info->stream, "%s",
+                    mips_gpr_names[(l >> OP_SH_GPR) & OP_MASK_GPR]);
+            break;
+
 	    default:
 	      /* xgettext:c-format */
 	      (*info->fprintf_func) (info->stream,
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

* [Qemu-devel] [PATCH 20/20] target-mips: add MSA support to mips32r5-generic
  2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
                   ` (18 preceding siblings ...)
  2014-07-14  9:56 ` [Qemu-devel] [PATCH 19/20] disas/mips.c: disassemble MSA instructions Yongbok Kim
@ 2014-07-14  9:56 ` Yongbok Kim
  19 siblings, 0 replies; 35+ messages in thread
From: Yongbok Kim @ 2014-07-14  9:56 UTC (permalink / raw)
  To: qemu-devel; +Cc: yongbok.kim, cristian.cuna, leon.alrae, aurelien

add MSA support to mips32r5-generic core definition

Signed-off-by: Yongbok Kim <yongbok.kim@imgtec.com>
---
 target-mips/translate_init.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/target-mips/translate_init.c b/target-mips/translate_init.c
index 9e0f67b..034a3f8 100644
--- a/target-mips/translate_init.c
+++ b/target-mips/translate_init.c
@@ -355,7 +355,7 @@ static const mips_def_t mips_defs[] =
                        (0 << CP0C1_DS) | (3 << CP0C1_DL) | (1 << CP0C1_DA) |
                        (1 << CP0C1_CA),
         .CP0_Config2 = MIPS_CONFIG2,
-        .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M),
+        .CP0_Config3 = MIPS_CONFIG3 | (1U << CP0C3_M) | (1 << CP0C3_MSAP),
         .CP0_Config4 = MIPS_CONFIG4 | (1U << CP0C4_M),
         .CP0_Config4_rw_bitmask = 0,
         .CP0_Config5 = MIPS_CONFIG5 | (1 << CP0C5_UFR),
@@ -373,7 +373,7 @@ static const mips_def_t mips_defs[] =
                     (0x93 << FCR0_PRID),
         .SEGBITS = 32,
         .PABITS = 32,
-        .insn_flags = CPU_MIPS32R5 | ASE_MIPS16 | ASE_DSP | ASE_DSPR2,
+        .insn_flags = CPU_MIPS32R5 | ASE_MIPS16 | ASE_DSP | ASE_DSPR2 | ASE_MSA,
         .mmu_type = MMU_TYPE_R4000,
     },
 #if defined(TARGET_MIPS64)
-- 
1.7.4

^ permalink raw reply related	[flat|nested] 35+ messages in thread

end of thread, other threads:[~2014-10-28 23:54 UTC | newest]

Thread overview: 35+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-07-14  9:55 [Qemu-devel] [PATCH 00/20] target-mips: add MSA module Yongbok Kim
2014-07-14  9:55 ` [Qemu-devel] [PATCH 01/20] target-mips: add MSA defines and data structure Yongbok Kim
2014-10-22 11:35   ` James Hogan
2014-10-24  9:35     ` Yongbok Kim
2014-10-24 12:57       ` Leon Alrae
2014-10-22 13:15   ` James Hogan
2014-07-14  9:55 ` [Qemu-devel] [PATCH 02/20] target-mips: add MSA exceptions Yongbok Kim
2014-07-14  9:55 ` [Qemu-devel] [PATCH 03/20] target-mips: move common funcs to cpu.h Yongbok Kim
2014-10-10  9:22   ` Leon Alrae
2014-07-14  9:55 ` [Qemu-devel] [PATCH 04/20] target-mips: add 8, 16, 32, 64 bits load and store Yongbok Kim
2014-10-10  9:26   ` Leon Alrae
2014-07-14  9:55 ` [Qemu-devel] [PATCH 05/20] target-mips: stop translation after ctc1 Yongbok Kim
2014-07-14  9:55 ` [Qemu-devel] [PATCH 06/20] target-mips: add MSA opcode enum Yongbok Kim
2014-10-10  9:26   ` Leon Alrae
2014-10-22 12:18   ` James Hogan
2014-07-14  9:55 ` [Qemu-devel] [PATCH 07/20] target-mips: add msa_reset(), global msa register Yongbok Kim
2014-10-22 13:21   ` James Hogan
2014-07-14  9:55 ` [Qemu-devel] [PATCH 08/20] target-mips: add msa_helper.c Yongbok Kim
2014-10-10  9:27   ` Leon Alrae
2014-10-22 15:29   ` James Hogan
2014-07-14  9:55 ` [Qemu-devel] [PATCH 09/20] target-mips: add MSA branch instructions Yongbok Kim
2014-10-10 14:13   ` Leon Alrae
2014-10-28 23:05   ` James Hogan
2014-07-14  9:55 ` [Qemu-devel] [PATCH 10/20] target-mips: add MSA I8 format instructions Yongbok Kim
2014-10-28 23:54   ` James Hogan
2014-07-14  9:55 ` [Qemu-devel] [PATCH 11/20] target-mips: add MSA I5 " Yongbok Kim
2014-07-14  9:55 ` [Qemu-devel] [PATCH 12/20] target-mips: add MSA BIT " Yongbok Kim
2014-07-14  9:55 ` [Qemu-devel] [PATCH 13/20] target-mips: add MSA 3R " Yongbok Kim
2014-07-14  9:55 ` [Qemu-devel] [PATCH 14/20] target-mips: add MSA ELM " Yongbok Kim
2014-07-14  9:55 ` [Qemu-devel] [PATCH 15/20] target-mips: add MSA 3RF " Yongbok Kim
2014-07-14  9:55 ` [Qemu-devel] [PATCH 16/20] target-mips: add MSA VEC/2R " Yongbok Kim
2014-07-14  9:56 ` [Qemu-devel] [PATCH 17/20] target-mips: add MSA 2RF " Yongbok Kim
2014-07-14  9:56 ` [Qemu-devel] [PATCH 18/20] target-mips: add MSA MI10 " Yongbok Kim
2014-07-14  9:56 ` [Qemu-devel] [PATCH 19/20] disas/mips.c: disassemble MSA instructions Yongbok Kim
2014-07-14  9:56 ` [Qemu-devel] [PATCH 20/20] target-mips: add MSA support to mips32r5-generic Yongbok Kim

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).