qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH v2 0/4] target/mips: Optimize support for certain MSA instructions
@ 2019-03-01 13:08 Mateja Marjanovic
  2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 1/4] target/mips: Optimize support for MSA instructions ILVEV.<B|H|W|D> Mateja Marjanovic
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Mateja Marjanovic @ 2019-03-01 13:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, amarkovic, arikalo

From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>

This series optimizes the support for certain MSA instructions.

v2:
  -Fixed indentation in two places
  -Fixed bugs for the cases when the destination register and
  one of the source registers are the same

Mateja Marjanovic (4):
  target/mips: Optimize support for MSA instructions ILVEV.<B|H|W|D>
  target/mips: Optimize support for MSA instructions ILVOD.<B|H|W|D>
  target/mips: Optimize support for MSA instructions ILVL.<B|H|W|D>
  target/mips: Optimize support for MSA instructions ILVR.<B|H|W|D>

 target/mips/msa_helper.c | 236 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 205 insertions(+), 31 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v2 1/4] target/mips: Optimize support for MSA instructions ILVEV.<B|H|W|D>
  2019-03-01 13:08 [Qemu-devel] [PATCH v2 0/4] target/mips: Optimize support for certain MSA instructions Mateja Marjanovic
@ 2019-03-01 13:08 ` Mateja Marjanovic
  2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 2/4] target/mips: Optimize support for MSA instructions ILVOD.<B|H|W|D> Mateja Marjanovic
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Mateja Marjanovic @ 2019-03-01 13:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, amarkovic, arikalo

From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>

Optimize support for MSA instructions ILVEV.B, ILVEV.H, ILVEV.W, and
ILVEV.D.

Optimization is done by eliminating loops, and explicitly assigning
desired values to individual data elements. Performance measurement
is done by executing the instructions large number of times on a
computer with Intel Core i7-3770 CPU @ 3.40GHz×8.

Measured time before optimization:
  ILVEV.B:  119.02 ms
  ILVEV.H:   94.16 ms
  ILVEV.W:  120.97 ms
  ILVEV.D:   42.99 ms

Measured time after optimization:
  ILVEV.B:   61.81 ms
  ILVEV.H:   42.78 ms
  ILVEV.W:   39.47 ms
  ILVEV.D:   39.11 ms

Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
---
 target/mips/msa_helper.c | 60 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 52 insertions(+), 8 deletions(-)

diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c
index c74e3cd..49332e6 100644
--- a/target/mips/msa_helper.c
+++ b/target/mips/msa_helper.c
@@ -1198,14 +1198,6 @@ MSA_FN_DF(ilvl_df)
 MSA_FN_DF(ilvr_df)
 #undef MSA_DO
 
-#define MSA_DO(DF)                      \
-    do {                                \
-        pwx->DF[2*i]   = pwt->DF[2*i];  \
-        pwx->DF[2*i+1] = pws->DF[2*i];  \
-    } while (0)
-MSA_FN_DF(ilvev_df)
-#undef MSA_DO
-
 #define MSA_DO(DF)                          \
     do {                                    \
         pwx->DF[2*i]   = pwt->DF[2*i+1];    \
@@ -1230,6 +1222,58 @@ MSA_FN_DF(vshf_df)
 #undef MSA_LOOP_COND
 #undef MSA_FN_DF
 
+
+void helper_msa_ilvev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                         uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[15] = pws->b[14];
+        pwd->b[14] = pwt->b[14];
+        pwd->b[13] = pws->b[12];
+        pwd->b[12] = pwt->b[12];
+        pwd->b[11] = pws->b[10];
+        pwd->b[10] = pwt->b[10];
+        pwd->b[9]  = pws->b[8];
+        pwd->b[8]  = pwt->b[8];
+        pwd->b[7]  = pws->b[6];
+        pwd->b[6]  = pwt->b[6];
+        pwd->b[5]  = pws->b[4];
+        pwd->b[4]  = pwt->b[4];
+        pwd->b[3]  = pws->b[2];
+        pwd->b[2]  = pwt->b[2];
+        pwd->b[1]  = pws->b[0];
+        pwd->b[0]  = pwt->b[0];
+        break;
+    case DF_HALF:
+        pwd->h[7] = pws->h[6];
+        pwd->h[6] = pwt->h[6];
+        pwd->h[5] = pws->h[4];
+        pwd->h[4] = pwt->h[4];
+        pwd->h[3] = pws->h[2];
+        pwd->h[2] = pwt->h[2];
+        pwd->h[1] = pws->h[0];
+        pwd->h[0] = pwt->h[0];
+        break;
+    case DF_WORD:
+        pwd->w[3] = pws->w[2];
+        pwd->w[2] = pwt->w[2];
+        pwd->w[1] = pws->w[0];
+        pwd->w[0] = pwt->w[0];
+        break;
+    case DF_DOUBLE:
+        pwd->d[1] = pws->d[0];
+        pwd->d[0] = pwt->d[0];
+        break;
+    default:
+        assert(0);
+    }
+}
+
 void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
                         uint32_t ws, uint32_t n)
 {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v2 2/4] target/mips: Optimize support for MSA instructions ILVOD.<B|H|W|D>
  2019-03-01 13:08 [Qemu-devel] [PATCH v2 0/4] target/mips: Optimize support for certain MSA instructions Mateja Marjanovic
  2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 1/4] target/mips: Optimize support for MSA instructions ILVEV.<B|H|W|D> Mateja Marjanovic
@ 2019-03-01 13:08 ` Mateja Marjanovic
  2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 3/4] target/mips: Optimize support for MSA instructions ILVL.<B|H|W|D> Mateja Marjanovic
  2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 4/4] target/mips: Optimize support for MSA instructions ILVR.<B|H|W|D> Mateja Marjanovic
  3 siblings, 0 replies; 5+ messages in thread
From: Mateja Marjanovic @ 2019-03-01 13:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, amarkovic, arikalo

From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>

Optimize support for MSA instructions ILVOD.B, ILVOD.H, ILVOD.W, and
ILVOD.D.

Optimization is done by eliminating loops, and explicitly assigning
desired values to individual data elements. Performance measurement
is done by executing the instructions large number of times on a
computer with Intel Core i7-3770 CPU @ 3.40GHz×8.

Measured time before optimization:
  ILVOD.B:  118.42 ms
  ILVOD.H:   93.45 ms
  ILVOD.W:  119.89 ms
  ILVOD.D:   46.00 ms

Measured time after optimization:
  ILVOD.B:   60.08 ms
  ILVOD.H:   41.52 ms
  ILVOD.W:   36.44 ms
  ILVOD.D:   36.40 ms

Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
---
 target/mips/msa_helper.c | 58 ++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 51 insertions(+), 7 deletions(-)

diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c
index 49332e6..2426338 100644
--- a/target/mips/msa_helper.c
+++ b/target/mips/msa_helper.c
@@ -1198,13 +1198,6 @@ MSA_FN_DF(ilvl_df)
 MSA_FN_DF(ilvr_df)
 #undef MSA_DO
 
-#define MSA_DO(DF)                          \
-    do {                                    \
-        pwx->DF[2*i]   = pwt->DF[2*i+1];    \
-        pwx->DF[2*i+1] = pws->DF[2*i+1];    \
-    } while (0)
-MSA_FN_DF(ilvod_df)
-#undef MSA_DO
 #undef MSA_LOOP_COND
 
 #define MSA_LOOP_COND(DF) \
@@ -1274,6 +1267,57 @@ void helper_msa_ilvev_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+void helper_msa_ilvod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                         uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[0]  = pwt->b[1];
+        pwd->b[1]  = pws->b[1];
+        pwd->b[2]  = pwt->b[3];
+        pwd->b[3]  = pws->b[3];
+        pwd->b[4]  = pwt->b[5];
+        pwd->b[5]  = pws->b[5];
+        pwd->b[6]  = pwt->b[7];
+        pwd->b[7]  = pws->b[7];
+        pwd->b[8]  = pwt->b[9];
+        pwd->b[9]  = pws->b[9];
+        pwd->b[10] = pwt->b[11];
+        pwd->b[11] = pws->b[11];
+        pwd->b[12] = pwt->b[13];
+        pwd->b[13] = pws->b[13];
+        pwd->b[14] = pwt->b[15];
+        pwd->b[15] = pws->b[15];
+        break;
+    case DF_HALF:
+        pwd->h[0] = pwt->h[1];
+        pwd->h[1] = pws->h[1];
+        pwd->h[2] = pwt->h[3];
+        pwd->h[3] = pws->h[3];
+        pwd->h[4] = pwt->h[5];
+        pwd->h[5] = pws->h[5];
+        pwd->h[6] = pwt->h[7];
+        pwd->h[7] = pws->h[7];
+        break;
+    case DF_WORD:
+        pwd->w[0] = pwt->w[1];
+        pwd->w[1] = pws->w[1];
+        pwd->w[2] = pwt->w[3];
+        pwd->w[3] = pws->w[3];
+        break;
+    case DF_DOUBLE:
+        pwd->d[0] = pwt->d[1];
+        pwd->d[1] = pws->d[1];
+        break;
+    default:
+        assert(0);
+    }
+}
+
 void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
                         uint32_t ws, uint32_t n)
 {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v2 3/4] target/mips: Optimize support for MSA instructions ILVL.<B|H|W|D>
  2019-03-01 13:08 [Qemu-devel] [PATCH v2 0/4] target/mips: Optimize support for certain MSA instructions Mateja Marjanovic
  2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 1/4] target/mips: Optimize support for MSA instructions ILVEV.<B|H|W|D> Mateja Marjanovic
  2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 2/4] target/mips: Optimize support for MSA instructions ILVOD.<B|H|W|D> Mateja Marjanovic
@ 2019-03-01 13:08 ` Mateja Marjanovic
  2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 4/4] target/mips: Optimize support for MSA instructions ILVR.<B|H|W|D> Mateja Marjanovic
  3 siblings, 0 replies; 5+ messages in thread
From: Mateja Marjanovic @ 2019-03-01 13:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, amarkovic, arikalo

From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>

Optimize support for MSA instructions ILVL.B, ILVL.H, ILVL.W, and
ILVL.D.

Optimization is done by eliminating loops, and explicitly assigning
desired values to individual data elements. Performance measurement
is done by executing the instructions large number of times on a
computer with Intel Core i7-3770 CPU @ 3.40GHz×8.

Measured time before optimization:
  ILVL.B:  114.31 ms
  ILVL.H:   92.74 ms
  ILVL.W:  123.96 ms
  ILVL.D:   39.45 ms

Measured time after optimization:
  ILVL.B:   60.18 ms
  ILVL.H:   40.38 ms
  ILVL.W:   38.98 ms
  ILVL.D:   36.48 ms

Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
---
 target/mips/msa_helper.c | 59 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 8 deletions(-)

diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c
index 2426338..a91f5a6 100644
--- a/target/mips/msa_helper.c
+++ b/target/mips/msa_helper.c
@@ -1184,14 +1184,6 @@ MSA_FN_DF(pckod_df)
 
 #define MSA_DO(DF)                      \
     do {                                \
-        pwx->DF[2*i]   = L##DF(pwt, i); \
-        pwx->DF[2*i+1] = L##DF(pws, i); \
-    } while (0)
-MSA_FN_DF(ilvl_df)
-#undef MSA_DO
-
-#define MSA_DO(DF)                      \
-    do {                                \
         pwx->DF[2*i]   = R##DF(pwt, i); \
         pwx->DF[2*i+1] = R##DF(pws, i); \
     } while (0)
@@ -1318,6 +1310,57 @@ void helper_msa_ilvod_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+void helper_msa_ilvl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                        uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[0]  = pwt->b[8];
+        pwd->b[1]  = pws->b[8];
+        pwd->b[2]  = pwt->b[9];
+        pwd->b[3]  = pws->b[9];
+        pwd->b[4]  = pwt->b[10];
+        pwd->b[5]  = pws->b[10];
+        pwd->b[6]  = pwt->b[11];
+        pwd->b[7]  = pws->b[11];
+        pwd->b[8]  = pwt->b[12];
+        pwd->b[9]  = pws->b[12];
+        pwd->b[10] = pwt->b[13];
+        pwd->b[11] = pws->b[13];
+        pwd->b[12] = pwt->b[14];
+        pwd->b[13] = pws->b[14];
+        pwd->b[14] = pwt->b[15];
+        pwd->b[15] = pws->b[15];
+        break;
+    case DF_HALF:
+        pwd->h[0] = pwt->h[4];
+        pwd->h[1] = pws->h[4];
+        pwd->h[2] = pwt->h[5];
+        pwd->h[3] = pws->h[5];
+        pwd->h[4] = pwt->h[6];
+        pwd->h[5] = pws->h[6];
+        pwd->h[6] = pwt->h[7];
+        pwd->h[7] = pws->h[7];
+        break;
+    case DF_WORD:
+        pwd->w[0] = pwt->w[2];
+        pwd->w[1] = pws->w[2];
+        pwd->w[2] = pwt->w[3];
+        pwd->w[3] = pws->w[3];
+        break;
+    case DF_DOUBLE:
+        pwd->d[0] = pwt->d[1];
+        pwd->d[1] = pws->d[1];
+        break;
+    default:
+        assert(0);
+    }
+}
+
 void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
                         uint32_t ws, uint32_t n)
 {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [Qemu-devel] [PATCH v2 4/4] target/mips: Optimize support for MSA instructions ILVR.<B|H|W|D>
  2019-03-01 13:08 [Qemu-devel] [PATCH v2 0/4] target/mips: Optimize support for certain MSA instructions Mateja Marjanovic
                   ` (2 preceding siblings ...)
  2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 3/4] target/mips: Optimize support for MSA instructions ILVL.<B|H|W|D> Mateja Marjanovic
@ 2019-03-01 13:08 ` Mateja Marjanovic
  3 siblings, 0 replies; 5+ messages in thread
From: Mateja Marjanovic @ 2019-03-01 13:08 UTC (permalink / raw)
  To: qemu-devel; +Cc: aurelien, amarkovic, arikalo

From: Mateja Marjanovic <Mateja.Marjanovic@rt-rk.com>

Optimize support for MSA instructions ILVR.B, ILVR.H, ILVR.W, and
ILVR.D.

Optimization is done by eliminating loops, and explicitly assigning
desired values to individual data elements. Performance measurement
is done by executing the instructions large number of times on a
computer with Intel Core i7-3770 CPU @ 3.40GHz×8.

Measured time before optimization:
  ILVR.B:  115.84 ms
  ILVR.H:   94.20 ms
  ILVR.W:  121.12 ms
  ILVR.D:   41.36 ms

Measured time after optimization:
  ILVR.B:   61.06 ms
  ILVR.H:   43.03 ms
  ILVR.W:   39.21 ms
  ILVR.D:   39.18 ms

Signed-off-by: Mateja Marjanovic <mateja.marjanovic@rt-rk.com>
---
 target/mips/msa_helper.c | 59 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 51 insertions(+), 8 deletions(-)

diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c
index a91f5a6..4c7ec05 100644
--- a/target/mips/msa_helper.c
+++ b/target/mips/msa_helper.c
@@ -1182,14 +1182,6 @@ MSA_FN_DF(pckev_df)
 MSA_FN_DF(pckod_df)
 #undef MSA_DO
 
-#define MSA_DO(DF)                      \
-    do {                                \
-        pwx->DF[2*i]   = R##DF(pwt, i); \
-        pwx->DF[2*i+1] = R##DF(pws, i); \
-    } while (0)
-MSA_FN_DF(ilvr_df)
-#undef MSA_DO
-
 #undef MSA_LOOP_COND
 
 #define MSA_LOOP_COND(DF) \
@@ -1361,6 +1353,57 @@ void helper_msa_ilvl_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
     }
 }
 
+void helper_msa_ilvr_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
+                        uint32_t ws, uint32_t wt)
+{
+    wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
+    wr_t *pws = &(env->active_fpu.fpr[ws].wr);
+    wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
+
+    switch (df) {
+    case DF_BYTE:
+        pwd->b[15] = pws->b[7];
+        pwd->b[14] = pwt->b[7];
+        pwd->b[13] = pws->b[6];
+        pwd->b[12] = pwt->b[6];
+        pwd->b[11] = pws->b[5];
+        pwd->b[10] = pwt->b[5];
+        pwd->b[9]  = pws->b[4];
+        pwd->b[8]  = pwt->b[4];
+        pwd->b[7]  = pws->b[3];
+        pwd->b[6]  = pwt->b[3];
+        pwd->b[5]  = pws->b[2];
+        pwd->b[4]  = pwt->b[2];
+        pwd->b[3]  = pws->b[1];
+        pwd->b[2]  = pwt->b[1];
+        pwd->b[1]  = pws->b[0];
+        pwd->b[0]  = pwt->b[0];
+        break;
+    case DF_HALF:
+        pwd->h[7] = pws->h[3];
+        pwd->h[6] = pwt->h[3];
+        pwd->h[5] = pws->h[2];
+        pwd->h[4] = pwt->h[2];
+        pwd->h[3] = pws->h[1];
+        pwd->h[2] = pwt->h[1];
+        pwd->h[1] = pws->h[0];
+        pwd->h[0] = pwt->h[0];
+        break;
+    case DF_WORD:
+        pwd->w[3] = pws->w[1];
+        pwd->w[2] = pwt->w[1];
+        pwd->w[1] = pws->w[0];
+        pwd->w[0] = pwt->w[0];
+        break;
+    case DF_DOUBLE:
+        pwd->d[1] = pws->d[0];
+        pwd->d[0] = pwt->d[0];
+        break;
+    default:
+        assert(0);
+    }
+}
+
 void helper_msa_sldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
                         uint32_t ws, uint32_t n)
 {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2019-03-01 13:09 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-03-01 13:08 [Qemu-devel] [PATCH v2 0/4] target/mips: Optimize support for certain MSA instructions Mateja Marjanovic
2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 1/4] target/mips: Optimize support for MSA instructions ILVEV.<B|H|W|D> Mateja Marjanovic
2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 2/4] target/mips: Optimize support for MSA instructions ILVOD.<B|H|W|D> Mateja Marjanovic
2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 3/4] target/mips: Optimize support for MSA instructions ILVL.<B|H|W|D> Mateja Marjanovic
2019-03-01 13:08 ` [Qemu-devel] [PATCH v2 4/4] target/mips: Optimize support for MSA instructions ILVR.<B|H|W|D> Mateja Marjanovic

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).