qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Daniel Henrique Barboza <danielhb413@gmail.com>
To: qemu-devel@nongnu.org
Cc: qemu-ppc@nongnu.org, stefanha@redhat.com,
	"Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>,
	Richard Henderson <richard.henderson@linaro.org>,
	Daniel Henrique Barboza <danielhb413@gmail.com>
Subject: [PULL 13/62] target/ppc: Move VABSDU[BHW] to decodetree and use gvec
Date: Fri, 28 Oct 2022 13:39:02 -0300	[thread overview]
Message-ID: <20221028163951.810456-14-danielhb413@gmail.com> (raw)
In-Reply-To: <20221028163951.810456-1-danielhb413@gmail.com>

From: "Lucas Mateus Castro (alqotel)" <lucas.araujo@eldorado.org.br>

Moved VABSDUB, VABSDUH and VABSDUW to decodetree and use gvec to
translate them.

vabsdub:
rept    loop    master             patch
8       12500   0,03601600         0,00688500 (-80.9%)
25      4000    0,03651000         0,00532100 (-85.4%)
100     1000    0,03666900         0,00595300 (-83.8%)
500     200     0,04305800         0,01244600 (-71.1%)
2500    40      0,06893300         0,04273700 (-38.0%)
8000    12      0,14633200         0,12660300 (-13.5%)

vabsduh:
rept    loop    master             patch
8       12500   0,02172400         0,00687500 (-68.4%)
25      4000    0,02154100         0,00531500 (-75.3%)
100     1000    0,02235400         0,00596300 (-73.3%)
500     200     0,02827500         0,01245100 (-56.0%)
2500    40      0,05638400         0,04285500 (-24.0%)
8000    12      0,13166000         0,12641400 (-4.0%)

vabsduw:
rept    loop    master             patch
8       12500   0,01646400         0,00688300 (-58.2%)
25      4000    0,01454500         0,00475500 (-67.3%)
100     1000    0,01545800         0,00511800 (-66.9%)
500     200     0,02168200         0,01114300 (-48.6%)
2500    40      0,04571300         0,04138800 (-9.5%)
8000    12      0,12209500         0,12178500 (-0.3%)

Same as VADDCUW and VSUBCUW, overall performance gain but it uses more
TCGop (4 before the patch, 6 after).

Signed-off-by: Lucas Mateus Castro (alqotel) <lucas.araujo@eldorado.org.br>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <20221019125040.48028-8-lucas.araujo@eldorado.org.br>
Signed-off-by: Daniel Henrique Barboza <danielhb413@gmail.com>
---
 target/ppc/helper.h                 |  6 ++--
 target/ppc/insn32.decode            |  6 ++++
 target/ppc/int_helper.c             | 13 +++-----
 target/ppc/translate/vmx-impl.c.inc | 49 +++++++++++++++++++++++++++--
 target/ppc/translate/vmx-ops.c.inc  |  3 --
 5 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/target/ppc/helper.h b/target/ppc/helper.h
index 71c22efc2e..fd8280dfa7 100644
--- a/target/ppc/helper.h
+++ b/target/ppc/helper.h
@@ -146,9 +146,9 @@ DEF_HELPER_FLAGS_1(ftsqrt, TCG_CALL_NO_RWG_SE, i32, i64)
 DEF_HELPER_FLAGS_4(VAVGUB, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
 DEF_HELPER_FLAGS_4(VAVGUH, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
 DEF_HELPER_FLAGS_4(VAVGUW, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
-DEF_HELPER_FLAGS_3(vabsdub, TCG_CALL_NO_RWG, void, avr, avr, avr)
-DEF_HELPER_FLAGS_3(vabsduh, TCG_CALL_NO_RWG, void, avr, avr, avr)
-DEF_HELPER_FLAGS_3(vabsduw, TCG_CALL_NO_RWG, void, avr, avr, avr)
+DEF_HELPER_FLAGS_4(VABSDUB, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_4(VABSDUH, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
+DEF_HELPER_FLAGS_4(VABSDUW, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
 DEF_HELPER_FLAGS_4(VAVGSB, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
 DEF_HELPER_FLAGS_4(VAVGSH, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
 DEF_HELPER_FLAGS_4(VAVGSW, TCG_CALL_NO_RWG, void, avr, avr, avr, i32)
diff --git a/target/ppc/insn32.decode b/target/ppc/insn32.decode
index 53dd45bbab..1214af7394 100644
--- a/target/ppc/insn32.decode
+++ b/target/ppc/insn32.decode
@@ -528,6 +528,12 @@ VAVGUB          000100 ..... ..... ..... 10000000010    @VX
 VAVGUH          000100 ..... ..... ..... 10001000010    @VX
 VAVGUW          000100 ..... ..... ..... 10010000010    @VX
 
+## Vector Integer Absolute Difference Instructions
+
+VABSDUB         000100 ..... ..... ..... 10000000011    @VX
+VABSDUH         000100 ..... ..... ..... 10001000011    @VX
+VABSDUW         000100 ..... ..... ..... 10010000011    @VX
+
 ## Vector Bit Manipulation Instruction
 
 VGNB            000100 ..... -- ... ..... 10011001100   @VX_n
diff --git a/target/ppc/int_helper.c b/target/ppc/int_helper.c
index bda76e54d4..d97a7f1f28 100644
--- a/target/ppc/int_helper.c
+++ b/target/ppc/int_helper.c
@@ -589,8 +589,8 @@ VAVG(VAVGSW, s32, int64_t)
 VAVG(VAVGUW, u32, uint64_t)
 #undef VAVG
 
-#define VABSDU_DO(name, element)                                        \
-void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)           \
+#define VABSDU(name, element)                                           \
+void helper_##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t v)\
 {                                                                       \
     int i;                                                              \
                                                                         \
@@ -606,12 +606,9 @@ void helper_v##name(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)           \
  *   name    - instruction mnemonic suffix (b: byte, h: halfword, w: word)
  *   element - element type to access from vector
  */
-#define VABSDU(type, element)                   \
-    VABSDU_DO(absdu##type, element)
-VABSDU(b, u8)
-VABSDU(h, u16)
-VABSDU(w, u32)
-#undef VABSDU_DO
+VABSDU(VABSDUB, u8)
+VABSDU(VABSDUH, u16)
+VABSDU(VABSDUW, u32)
 #undef VABSDU
 
 #define VCF(suffix, cvt, element)                                       \
diff --git a/target/ppc/translate/vmx-impl.c.inc b/target/ppc/translate/vmx-impl.c.inc
index 195c601f7a..7741f2eb49 100644
--- a/target/ppc/translate/vmx-impl.c.inc
+++ b/target/ppc/translate/vmx-impl.c.inc
@@ -431,9 +431,6 @@ GEN_VXFORM_V(vminsb, MO_8, tcg_gen_gvec_smin, 1, 12);
 GEN_VXFORM_V(vminsh, MO_16, tcg_gen_gvec_smin, 1, 13);
 GEN_VXFORM_V(vminsw, MO_32, tcg_gen_gvec_smin, 1, 14);
 GEN_VXFORM_V(vminsd, MO_64, tcg_gen_gvec_smin, 1, 15);
-GEN_VXFORM(vabsdub, 1, 16);
-GEN_VXFORM(vabsduh, 1, 17);
-GEN_VXFORM(vabsduw, 1, 18);
 GEN_VXFORM(vmrghb, 6, 0);
 GEN_VXFORM(vmrghh, 6, 1);
 GEN_VXFORM(vmrghw, 6, 2);
@@ -3455,6 +3452,52 @@ TRANS_FLAGS(ALTIVEC, VAVGUB, do_vx_vavg, 0, MO_8)
 TRANS_FLAGS(ALTIVEC, VAVGUH, do_vx_vavg, 0, MO_16)
 TRANS_FLAGS(ALTIVEC, VAVGUW, do_vx_vavg, 0, MO_32)
 
+static void gen_vabsdu(unsigned vece, TCGv_vec t, TCGv_vec a, TCGv_vec b)
+{
+    tcg_gen_umax_vec(vece, t, a, b);
+    tcg_gen_umin_vec(vece, a, a, b);
+    tcg_gen_sub_vec(vece, t, t, a);
+}
+
+static bool do_vabsdu(DisasContext *ctx, arg_VX *a, const int vece)
+{
+    static const TCGOpcode vecop_list[] = {
+        INDEX_op_umax_vec, INDEX_op_umin_vec, INDEX_op_sub_vec, 0
+    };
+
+    static const GVecGen3 op[] = {
+        {
+            .fniv = gen_vabsdu,
+            .fno = gen_helper_VABSDUB,
+            .opt_opc = vecop_list,
+            .vece = MO_8
+        },
+        {
+            .fniv = gen_vabsdu,
+            .fno = gen_helper_VABSDUH,
+            .opt_opc = vecop_list,
+            .vece = MO_16
+        },
+        {
+            .fniv = gen_vabsdu,
+            .fno = gen_helper_VABSDUW,
+            .opt_opc = vecop_list,
+            .vece = MO_32
+        },
+    };
+
+    REQUIRE_VECTOR(ctx);
+
+    tcg_gen_gvec_3(avr_full_offset(a->vrt), avr_full_offset(a->vra),
+                   avr_full_offset(a->vrb), 16, 16, &op[vece]);
+
+    return true;
+}
+
+TRANS_FLAGS2(ISA300, VABSDUB, do_vabsdu, MO_8)
+TRANS_FLAGS2(ISA300, VABSDUH, do_vabsdu, MO_16)
+TRANS_FLAGS2(ISA300, VABSDUW, do_vabsdu, MO_32)
+
 static bool do_vdiv_vmod(DisasContext *ctx, arg_VX *a, const int vece,
                          void (*func_32)(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b),
                          void (*func_64)(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b))
diff --git a/target/ppc/translate/vmx-ops.c.inc b/target/ppc/translate/vmx-ops.c.inc
index 02db51def0..33fec8aca4 100644
--- a/target/ppc/translate/vmx-ops.c.inc
+++ b/target/ppc/translate/vmx-ops.c.inc
@@ -83,9 +83,6 @@ GEN_VXFORM(vminsb, 1, 12),
 GEN_VXFORM(vminsh, 1, 13),
 GEN_VXFORM(vminsw, 1, 14),
 GEN_VXFORM_207(vminsd, 1, 15),
-GEN_VXFORM(vabsdub, 1, 16),
-GEN_VXFORM(vabsduh, 1, 17),
-GEN_VXFORM(vabsduw, 1, 18),
 GEN_VXFORM(vmrghb, 6, 0),
 GEN_VXFORM(vmrghh, 6, 1),
 GEN_VXFORM(vmrghw, 6, 2),
-- 
2.37.3



  parent reply	other threads:[~2022-10-28 16:42 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-10-28 16:38 [PULL 00/62] ppc queue Daniel Henrique Barboza
2022-10-28 16:38 ` [PULL 01/62] target/ppc: fix msgclr/msgsnd insns flags Daniel Henrique Barboza
2022-10-28 16:38 ` [PULL 02/62] target/ppc: fix msgsync " Daniel Henrique Barboza
2022-10-28 16:38 ` [PULL 03/62] target/ppc: fix REQUIRE_HV macro definition Daniel Henrique Barboza
2022-10-28 16:38 ` [PULL 04/62] target/ppc: move msgclr/msgsnd to decodetree Daniel Henrique Barboza
2022-10-28 16:38 ` [PULL 05/62] target/ppc: move msgclrp/msgsndp " Daniel Henrique Barboza
2022-10-28 16:38 ` [PULL 06/62] target/ppc: move msgsync " Daniel Henrique Barboza
2022-10-28 16:38 ` [PULL 07/62] target/ppc: Moved VMLADDUHM to decodetree and use gvec Daniel Henrique Barboza
2022-10-28 16:38 ` [PULL 08/62] target/ppc: Move VMH[R]ADDSHS instruction to decodetree Daniel Henrique Barboza
2022-10-28 16:38 ` [PULL 09/62] target/ppc: Move V(ADD|SUB)CUW to decodetree and use gvec Daniel Henrique Barboza
2022-10-28 16:38 ` [PULL 10/62] target/ppc: Move VNEG[WD] to decodtree " Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 11/62] target/ppc: Move VPRTYB[WDQ] to decodetree " Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 12/62] target/ppc: Move VAVG[SU][BHW] " Daniel Henrique Barboza
2022-10-28 16:39 ` Daniel Henrique Barboza [this message]
2022-10-28 16:39 ` [PULL 14/62] target/ppc: Use gvec to decode XV[N]ABS[DS]P/XVNEG[DS]P Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 15/62] target/ppc: Use gvec to decode XVCPSGN[SD]P Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 16/62] target/ppc: Moved XVTSTDC[DS]P to decodetree Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 17/62] target/ppc: Moved XSTSTDC[QDS]P " Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 18/62] target/ppc: Use gvec to decode XVTSTDC[DS]P Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 19/62] target/ppc: define PPC_INTERRUPT_* values directly Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 20/62] target/ppc: always use ppc_set_irq to set env->pending_interrupts Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 21/62] target/ppc: split interrupt masking and delivery from ppc_hw_interrupt Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 22/62] target/ppc: prepare to split interrupt masking and delivery by excp_model Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 23/62] target/ppc: create an interrupt masking method for POWER9/POWER10 Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 24/62] target/ppc: remove unused interrupts from p9_next_unmasked_interrupt Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 25/62] target/ppc: create an interrupt deliver method for POWER9/POWER10 Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 26/62] target/ppc: remove unused interrupts from p9_deliver_interrupt Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 27/62] target/ppc: remove generic architecture checks " Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 28/62] target/ppc: move power-saving interrupt masking out of cpu_has_work_POWER9 Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 29/62] target/ppc: add power-saving interrupt masking logic to p9_next_unmasked_interrupt Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 30/62] target/ppc: create an interrupt masking method for POWER8 Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 31/62] target/ppc: remove unused interrupts from p8_next_unmasked_interrupt Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 32/62] target/ppc: create an interrupt deliver method for POWER8 Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 33/62] target/ppc: remove unused interrupts from p8_deliver_interrupt Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 34/62] target/ppc: remove generic architecture checks " Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 35/62] target/ppc: move power-saving interrupt masking out of cpu_has_work_POWER8 Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 36/62] target/ppc: add power-saving interrupt masking logic to p8_next_unmasked_interrupt Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 37/62] target/ppc: create an interrupt masking method for POWER7 Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 38/62] target/ppc: remove unused interrupts from p7_next_unmasked_interrupt Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 39/62] target/ppc: create an interrupt deliver method for POWER7 Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 40/62] target/ppc: remove unused interrupts from p7_deliver_interrupt Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 41/62] target/ppc: remove generic architecture checks " Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 42/62] target/ppc: move power-saving interrupt masking out of cpu_has_work_POWER7 Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 43/62] target/ppc: add power-saving interrupt masking logic to p7_next_unmasked_interrupt Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 44/62] target/ppc: remove ppc_store_lpcr from CONFIG_USER_ONLY builds Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 45/62] target/ppc: introduce ppc_maybe_interrupt Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 46/62] target/ppc: unify cpu->has_work based on cs->interrupt_request Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 47/62] target/ppc: move the p*_interrupt_powersave methods to excp_helper.c Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 48/62] ppc440_uc.c: Move DDR2 SDRAM controller model to ppc4xx_sdram.c Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 49/62] ppc4xx_devs.c: Move DDR " Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 50/62] ppc4xx_sdram: Move ppc4xx_sdram_banks() " Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 51/62] ppc4xx_sdram: Use hwaddr for memory bank size Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 52/62] ppc4xx_sdram: Rename local state variable for brevity Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 53/62] ppc4xx_sdram: Generalise bank setup Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 54/62] ppc4xx_sdram: Convert DDR SDRAM controller to new bank handling Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 55/62] ppc4xx_sdram: Add errp parameter to ppc4xx_sdram_banks() Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 56/62] target/ppc: Add new PMC HFLAGS Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 57/62] target/ppc: Increment PMC5 with inline insns Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 58/62] docs/system/ppc/ppce500: Use qemu-system-ppc64 across the board(s) Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 59/62] hw/block/pflash_cfi0{1, 2}: Error out if device length isn't a power of two Daniel Henrique Barboza
2022-11-01 22:23   ` Stefan Hajnoczi
2022-11-01 22:49     ` Philippe Mathieu-Daudé
2022-11-02 19:49       ` Daniel Henrique Barboza
2022-11-08 17:36       ` Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 60/62] hw/sd/sdhci-internal: Unexport ESDHC defines Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 61/62] hw/sd/sdhci: Rename ESDHC_* defines to USDHC_* Daniel Henrique Barboza
2022-10-28 16:39 ` [PULL 62/62] hw/ppc/e500: Implement pflash handling Daniel Henrique Barboza
2022-10-28 20:25 ` [PULL 00/62] ppc queue Daniel Henrique Barboza

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20221028163951.810456-14-danielhb413@gmail.com \
    --to=danielhb413@gmail.com \
    --cc=lucas.araujo@eldorado.org.br \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).