[Qemu-devel] [PATCH 1/8] target/ppc: Optimize emulation of lvsl and lvsr instructions

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Stefan Brankovic <stefan.brankovic@rt-rk.com>
To: qemu-devel@nongnu.org
Cc: david@gibson.dropbear.id.au
Subject: [Qemu-devel] [PATCH 1/8] target/ppc: Optimize emulation of lvsl and lvsr instructions
Date: Thu,  6 Jun 2019 12:15:23 +0200	[thread overview]
Message-ID: <1559816130-17113-2-git-send-email-stefan.brankovic@rt-rk.com> (raw)
In-Reply-To: <1559816130-17113-1-git-send-email-stefan.brankovic@rt-rk.com>

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=y, Size: 7561 bytes --]

Adding simple macro that is calling tcg implementation of appropriate
instruction if altivec support is active.

Optimization of altivec instruction lvsl (Load Vector for Shift Left).
Place bytes sh:sh+15 of value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F
in destination register. Sh is calculated by adding 2 source registers and
getting bits 60-63 of result.

First we place bits [28-31] of EA to variable sh. After that
we create bytes sh:(sh+7) of X(from description) in for loop
(by incrementing sh in each iteration and placing it in
appropriate byte of variable result) and save them in higher
doubleword element of vD. We repeat this once again for lower
doubleword element of vD by creating bytes (sh+8):(sh+15) in
a for loop and saving result.

Optimization of altivec instruction lvsr (Load Vector for Shift Right).
Place bytes 16-sh:31-sh of value 0x00 || 0x01 || 0x02 || ... || 0x1E ||
0x1F in destination register. Sh is calculated by adding 2 source
registers and getting bits 60-63 of result.

First we place bits [28-31] of EA to variable sh. After that
we create bytes (16-sh):(23-sh) of X(from description) in for loop
(by incrementing sh in each iteration and placing it in
appropriate byte of variable result) and save them in higher
doubleword element of vD. We repeat this once again for lower
doubleword element of vD by creating bytes (24-sh):(32-sh) in
a for loop and saving result.

Signed-off-by: Stefan Brankovic <stefan.brankovic@rt-rk.com>
---
 target/ppc/translate/vmx-impl.inc.c | 143 ++++++++++++++++++++++++++++--------
 1 file changed, 111 insertions(+), 32 deletions(-)

diff --git a/target/ppc/translate/vmx-impl.inc.c b/target/ppc/translate/vmx-impl.inc.c
index bd3ff40..140bb05 100644
--- a/target/ppc/translate/vmx-impl.inc.c
+++ b/target/ppc/translate/vmx-impl.inc.c
@@ -142,38 +142,6 @@ GEN_VR_STVE(bx, 0x07, 0x04, 1);
 GEN_VR_STVE(hx, 0x07, 0x05, 2);
 GEN_VR_STVE(wx, 0x07, 0x06, 4);
 
-static void gen_lvsl(DisasContext *ctx)
-{
-    TCGv_ptr rd;
-    TCGv EA;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    rd = gen_avr_ptr(rD(ctx->opcode));
-    gen_helper_lvsl(rd, EA);
-    tcg_temp_free(EA);
-    tcg_temp_free_ptr(rd);
-}
-
-static void gen_lvsr(DisasContext *ctx)
-{
-    TCGv_ptr rd;
-    TCGv EA;
-    if (unlikely(!ctx->altivec_enabled)) {
-        gen_exception(ctx, POWERPC_EXCP_VPU);
-        return;
-    }
-    EA = tcg_temp_new();
-    gen_addr_reg_index(ctx, EA);
-    rd = gen_avr_ptr(rD(ctx->opcode));
-    gen_helper_lvsr(rd, EA);
-    tcg_temp_free(EA);
-    tcg_temp_free_ptr(rd);
-}
-
 static void gen_mfvscr(DisasContext *ctx)
 {
     TCGv_i32 t;
@@ -316,6 +284,16 @@ static void glue(gen_, name)(DisasContext *ctx)                         \
     tcg_temp_free_ptr(rd);                                              \
 }
 
+#define GEN_VXFORM_TRANS(name, opc2, opc3)                              \
+static void glue(gen_, name)(DisasContext *ctx)                         \
+{                                                                       \
+    if (unlikely(!ctx->altivec_enabled)) {                              \
+        gen_exception(ctx, POWERPC_EXCP_VPU);                           \
+        return;                                                         \
+    }                                                                   \
+    trans_##name(ctx);                                                  \
+}
+
 #define GEN_VXFORM_ENV(name, opc2, opc3)                                \
 static void glue(gen_, name)(DisasContext *ctx)                         \
 {                                                                       \
@@ -515,6 +493,105 @@ static void gen_vmrgow(DisasContext *ctx)
     tcg_temp_free_i64(avr);
 }
 
+/*
+ * lvsl VRT,RA,RB - Load Vector for Shift Left
+ *
+ * Let the EA be the sum (rA|0)+(rB). Let sh=EA[28–31].
+ * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
+ * Bytes sh:sh+15 of X are placed into vD.
+ */
+static void trans_lvsl(DisasContext *ctx)
+{
+    int VT = rD(ctx->opcode);
+    TCGv_i64 result = tcg_temp_new_i64();
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    TCGv_i64 sh = tcg_temp_new_i64();
+    TCGv_i64 EA = tcg_temp_new();
+    int i;
+
+    /* Get sh(from description) by anding EA with 0xf. */
+    gen_addr_reg_index(ctx, EA);
+    tcg_gen_andi_i64(sh, EA, 0xfULL);
+    /*
+     * Create bytes sh:sh+7 of X(from description) and place them in
+     * higher doubleword of vD.
+     */
+    tcg_gen_addi_i64(result, sh, 7);
+    for (i = 7; i >= 1; i--) {
+        tcg_gen_shli_i64(tmp, sh, i * 8);
+        tcg_gen_or_i64(result, result, tmp);
+        tcg_gen_addi_i64(sh, sh, 1);
+    }
+    set_avr64(VT, result, true);
+    /*
+     * Create bytes sh+8:sh+15 of X(from description) and place them in
+     * lower doubleword of vD.
+     */
+    tcg_gen_addi_i64(result, sh, 8);
+    for (i = 7; i >= 1; i--) {
+        tcg_gen_addi_i64(sh, sh, 1);
+        tcg_gen_shli_i64(tmp, sh, i * 8);
+        tcg_gen_or_i64(result, result, tmp);
+    }
+    set_avr64(VT, result, false);
+
+    tcg_temp_free_i64(result);
+    tcg_temp_free_i64(tmp);
+    tcg_temp_free_i64(sh);
+    tcg_temp_free(EA);
+}
+
+/*
+ * lvsr VRT,RA,RB - Load Vector for Shift Right
+ *
+ * Let the EA be the sum (rA|0)+(rB). Let sh=EA[28–31].
+ * Let X be the 32-byte value 0x00 || 0x01 || 0x02 || ... || 0x1E || 0x1F.
+ * Bytes (16-sh):(31-sh) of X are placed into vD.
+ */
+static void trans_lvsr(DisasContext *ctx)
+{
+    int VT = rD(ctx->opcode);
+    TCGv_i64 result = tcg_temp_new_i64();
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    TCGv_i64 sh = tcg_temp_new_i64();
+    TCGv_i64 EA = tcg_temp_new();
+    int i;
+
+    /* Get sh(from description) by anding EA with 0xf. */
+    gen_addr_reg_index(ctx, EA);
+    tcg_gen_andi_i64(sh, EA, 0xfULL);
+    /* Make (16-sh) and save it in sh. */
+    tcg_gen_subi_i64(sh, sh, 0x10ULL);
+    tcg_gen_neg_i64(sh, sh);
+    /*
+     * Create bytes (16-sh):(23-sh) of X(from description) and place them in
+     * higher doubleword of vD.
+     */
+    tcg_gen_addi_i64(result, sh, 7);
+    for (i = 7; i >= 1; i--) {
+        tcg_gen_shli_i64(tmp, sh, i * 8);
+        tcg_gen_or_i64(result, result, tmp);
+        tcg_gen_addi_i64(sh, sh, 1);
+    }
+    set_avr64(VT, result, true);
+    /*
+     * Create bytes (24-sh):(32-sh) of X(from description) and place them in
+     * lower doubleword of vD.
+     */
+    tcg_gen_addi_i64(result, sh, 8);
+    for (i = 7; i >= 1; i--) {
+        tcg_gen_addi_i64(sh, sh, 1);
+        tcg_gen_shli_i64(tmp, sh, i * 8);
+        tcg_gen_or_i64(result, result, tmp);
+    }
+    set_avr64(VT, result, false);
+
+    tcg_temp_free_i64(result);
+    tcg_temp_free_i64(tmp);
+    tcg_temp_free_i64(sh);
+    tcg_temp_free(EA);
+}
+
 GEN_VXFORM(vmuloub, 4, 0);
 GEN_VXFORM(vmulouh, 4, 1);
 GEN_VXFORM(vmulouw, 4, 2);
@@ -657,6 +734,8 @@ GEN_VXFORM_DUAL(vmrgow, PPC_NONE, PPC2_ALTIVEC_207,
 GEN_VXFORM_HETRO(vextubrx, 6, 28)
 GEN_VXFORM_HETRO(vextuhrx, 6, 29)
 GEN_VXFORM_HETRO(vextuwrx, 6, 30)
+GEN_VXFORM_TRANS(lvsl, 6, 31)
+GEN_VXFORM_TRANS(lvsr, 6, 32)
 GEN_VXFORM_DUAL(vmrgew, PPC_NONE, PPC2_ALTIVEC_207, \
                 vextuwrx, PPC_NONE, PPC2_ISA300)
 
-- 
2.7.4

next prev parent reply	other threads:[~2019-06-06 10:18 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-06 10:15 [Qemu-devel] [PATCH 0/8] Optimize emulation of ten Altivec instructions: lvsl, Stefan Brankovic
2019-06-06 10:15 ` Stefan Brankovic [this message]
2019-06-06 16:46   ` [Qemu-devel] [PATCH 1/8] target/ppc: Optimize emulation of lvsl and lvsr instructions Richard Henderson
2019-06-17 11:31     ` Stefan Brankovic
2019-06-06 10:15 ` [Qemu-devel] [PATCH 2/8] target/ppc: Optimize emulation of vsl and vsr instructions Stefan Brankovic
2019-06-06 17:03   ` Richard Henderson
2019-06-17 11:36     ` Stefan Brankovic
2019-06-06 10:15 ` [Qemu-devel] [PATCH 3/8] target/ppc: Optimize emulation of vpkpx instruction Stefan Brankovic
2019-06-06 10:15 ` [Qemu-devel] [PATCH 4/8] target/ppc: Optimize emulation of vgbbd instruction Stefan Brankovic
2019-06-06 18:19   ` Richard Henderson
2019-06-17 11:58     ` Stefan Brankovic
2019-06-06 10:15 ` [Qemu-devel] [PATCH 5/8] target/ppc: Optimize emulation of vclzd instruction Stefan Brankovic
2019-06-06 18:26   ` Richard Henderson
2019-06-06 10:15 ` [Qemu-devel] [PATCH 6/8] target/ppc: Optimize emulation of vclzw instruction Stefan Brankovic
2019-06-06 18:34   ` Richard Henderson
2019-06-17 11:50     ` Stefan Brankovic
2019-06-06 10:15 ` [Qemu-devel] [PATCH 7/8] target/ppc: Optimize emulation of vclzh and vclzb instructions Stefan Brankovic
2019-06-06 20:38   ` Richard Henderson
2019-06-17 11:42     ` Stefan Brankovic
2019-06-06 10:15 ` [Qemu-devel] [PATCH 8/8] target/ppc: Refactor emulation of vmrgew and vmrgow instructions Stefan Brankovic
2019-06-06 20:43   ` Richard Henderson
2019-06-17 11:43     ` Stefan Brankovic
2019-06-06 17:13 ` [Qemu-devel] [PATCH 0/8] Optimize emulation of ten Altivec instructions: lvsl, Richard Henderson
2019-06-12  7:31   ` [Qemu-devel] ?==?utf-8?q? ?==?utf-8?q? [PATCH 0/8] Optimize emulation of ten Altivec instructions:?==?utf-8?q? lvsl, Stefan Brankovic
2019-06-17 11:32   ` [Qemu-devel] [PATCH 0/8] Optimize emulation of ten Altivec instructions: lvsl, Stefan Brankovic
2019-06-07  3:51 ` Howard Spoelstra
  -- strict thread matches above, loose matches on Subject: below --
2019-06-19 11:03 [Qemu-devel] [PATCH 0/8] target/ppc: Optimize emulation of some Altivec instructions Stefan Brankovic
2019-06-19 11:03 ` [Qemu-devel] [PATCH 1/8] target/ppc: Optimize emulation of lvsl and lvsr instructions Stefan Brankovic
2019-06-26 15:28   ` Richard Henderson
2019-06-26 15:48   ` Richard Henderson

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:bd3ff40 dfblob:140bb05 )
 OR (
bs:"[Qemu-devel] [PATCH 1/8] target/ppc: Optimize emulation of lvsl and lvsr instructions" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1559816130-17113-2-git-send-email-stefan.brankovic@rt-rk.com \
    --to=stefan.brankovic@rt-rk.com \
    --cc=david@gibson.dropbear.id.au \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).