qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: aliguori@amazon.com, aurelien@aurel32.net
Subject: [Qemu-devel] [PATCH 15/20] tcg-ia64: Move bswap for store into tlb load
Date: Thu, 31 Oct 2013 13:22:04 -0700	[thread overview]
Message-ID: <1383250929-10288-16-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1383250929-10288-1-git-send-email-rth@twiddle.net>

Saving at least two cycles per store, and cleaning up the code.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ia64/tcg-target.c | 96 +++++++++++++++++----------------------------------
 1 file changed, 32 insertions(+), 64 deletions(-)

diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index b4bb305..985e213 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1565,9 +1565,11 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
 /* Load and compare a TLB entry, and return the result in (p6, p7).
    R2 is loaded with the address of the addend TLB entry.
    R57 is loaded with the address, zero extented on 32-bit targets.
-   R1, R3 are clobbered. */
+   R1, R3 are clobbered, leaving R56 free for...
+   BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store.  */
 static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
-                                    TCGMemOp s_bits, int off_rw, int off_add)
+                                    TCGMemOp s_bits, int off_rw, int off_add,
+                                    uint64_t bswap1, uint64_t bswap2)
 {
      /*
         .mii
@@ -1615,12 +1617,12 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
                                (TARGET_LONG_BITS == 32
                                 ? OPC_LD4_M3 : OPC_LD8_M3), TCG_REG_R3,
                                TCG_REG_R2, off_add - off_rw),
-                   INSN_NOP_I);
+                   bswap1);
     tcg_out_bundle(s, mmI,
                    INSN_NOP_M,
                    tcg_opc_a6 (TCG_REG_P0, OPC_CMP_EQ_A6, TCG_REG_P6,
                                TCG_REG_P7, TCG_REG_R1, TCG_REG_R3),
-                   INSN_NOP_I);
+                   bswap2);
 }
 
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
@@ -1650,7 +1652,8 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
     /* Read the TLB entry */
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_read),
-                     offsetof(CPUArchState, tlb_table[mem_index][0].addend));
+                     offsetof(CPUArchState, tlb_table[mem_index][0].addend),
+                     INSN_NOP_I, INSN_NOP_I);
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
@@ -1721,17 +1724,31 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
     static const uint64_t opc_st_m4[4] = {
         OPC_ST1_M4, OPC_ST2_M4, OPC_ST4_M4, OPC_ST8_M4
     };
-    int addr_reg, data_reg, mem_index;
+    TCGReg addr_reg, data_reg, store_reg;
+    int mem_index;
+    uint64_t bswap1, bswap2;
     TCGMemOp s_bits;
 
-    data_reg = *args++;
+    store_reg = data_reg = *args++;
     addr_reg = *args++;
     mem_index = *args;
     s_bits = opc & MO_SIZE;
 
+    bswap1 = bswap2 = INSN_NOP_I;
+    if (opc & MO_BSWAP) {
+        store_reg = TCG_REG_R56;
+        bswap1 = tcg_opc_bswap64_i(TCG_REG_P0, store_reg, data_reg);
+        if (s_bits < MO_64) {
+            int shift = 64 - (8 << s_bits);
+            bswap2 = tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11,
+                                 store_reg, store_reg, shift, 63 - shift);
+        }
+    }
+
     tcg_out_qemu_tlb(s, addr_reg, s_bits,
                      offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
-                     offsetof(CPUArchState, tlb_table[mem_index][0].addend));
+                     offsetof(CPUArchState, tlb_table[mem_index][0].addend),
+                     bswap1, bswap2);
 
     /* P6 is the fast path, and P7 the slow path */
     tcg_out_bundle(s, mLX,
@@ -1746,63 +1763,14 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                                TCG_REG_R3, TCG_REG_R57),
                    tcg_opc_i21(TCG_REG_P7, OPC_MOV_I21, TCG_REG_B6,
                                TCG_REG_R3, 0));
-
-    switch (opc) {
-    case MO_8:
-    case MO_16:
-    case MO_32:
-    case MO_64:
-        tcg_out_bundle(s, mii,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       INSN_NOP_I);
-        break;
-
-    case MO_16 | MO_BSWAP:
-        tcg_out_bundle(s, miI,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       INSN_NOP_I,
-                       tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 15, 15));
-        tcg_out_bundle(s, miI,
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       INSN_NOP_I,
-                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
-        data_reg = TCG_REG_R2;
-        break;
-
-    case MO_32 | MO_BSWAP:
-        tcg_out_bundle(s, miI,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       INSN_NOP_I,
-                       tcg_opc_i12(TCG_REG_P6, OPC_DEP_Z_I12,
-                                   TCG_REG_R2, data_reg, 31, 31));
-        tcg_out_bundle(s, miI,
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       INSN_NOP_I,
-                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, TCG_REG_R2));
-        data_reg = TCG_REG_R2;
-        break;
-
-    case MO_64 | MO_BSWAP:
-        tcg_out_bundle(s, miI,
-                       tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
-                                   TCG_REG_R1, TCG_REG_R2),
-                       tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
-                       tcg_opc_bswap64_i(TCG_REG_P6, TCG_REG_R2, data_reg));
-        data_reg = TCG_REG_R2;
-        break;
-
-    default:
-        tcg_abort();
-    }
-
+    tcg_out_bundle(s, mii,
+                   tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1,
+                               TCG_REG_R1, TCG_REG_R2),
+                   tcg_opc_mov_a(TCG_REG_P7, TCG_REG_R58, data_reg),
+                   INSN_NOP_I);
     tcg_out_bundle(s, miB,
-                   tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
-                               data_reg, TCG_REG_R3),
+                   tcg_opc_m4 (TCG_REG_P6, opc_st_m4[s_bits],
+                               store_reg, TCG_REG_R3),
                    tcg_opc_movi_a(TCG_REG_P7, TCG_REG_R59, mem_index),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
-- 
1.8.3.1

  parent reply	other threads:[~2013-10-31 20:22 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-10-31 20:21 [Qemu-devel] [PATCH 00/20] tcg improvements for ia64 Richard Henderson
2013-10-31 20:21 ` [Qemu-devel] [PATCH 01/20] tcg-ia64: Use TCGMemOp within qemu_ldst routines Richard Henderson
2013-11-03 22:55   ` Aurelien Jarno
2013-11-06 22:05     ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 02/20] tcg-ia64: Use shortcuts for nop insns Richard Henderson
2013-11-03 22:55   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 03/20] tcg-ia64: Handle constant calls Richard Henderson
2013-11-03 22:56   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 04/20] tcg-ia64: Simplify brcond Richard Henderson
2013-11-03 22:56   ` Aurelien Jarno
2013-11-03 23:34     ` Richard Henderson
2013-11-04  4:24       ` Richard Henderson
2013-11-06 22:05         ` Aurelien Jarno
2013-11-06 22:04       ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 05/20] tcg-ia64: Move AREG0 to R32 Richard Henderson
2013-11-06 22:33   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 06/20] tcg-ia64: Avoid unnecessary stop bit in tcg_out_alu Richard Henderson
2013-11-06 22:33   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 07/20] tcg-ia64: Use ADDS for small addition Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 08/20] tcg-ia64: Use SUB_A3 and ADDS_A4 for subtraction Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 09/20] tcg-ia64: Use A3 form of logical operations Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:21 ` [Qemu-devel] [PATCH 10/20] tcg-ia64: Introduce tcg_opc_mov_a Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 11/20] tcg-ia64: Introduce tcg_opc_movi_a Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 12/20] tcg-ia64: Introduce tcg_opc_ext_i Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 13/20] tcg-ia64: Introduce tcg_opc_bswap64_i Richard Henderson
2013-11-06 22:34   ` Aurelien Jarno
2013-10-31 20:22 ` [Qemu-devel] [PATCH 14/20] tcg-ia64: Re-bundle the tlb load Richard Henderson
2013-10-31 20:22 ` Richard Henderson [this message]
2013-10-31 20:22 ` [Qemu-devel] [PATCH 16/20] tcg-ia64: Move tlb addend load into tlb read Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 17/20] tcg-ia64: Reduce code duplication in tcg_out_qemu_ld Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 18/20] tcg-ia64: Convert to new ldst helpers Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 19/20] tcg-ia64: Move part of softmmu slow path out of line Richard Henderson
2013-10-31 20:22 ` [Qemu-devel] [PATCH 20/20] tcg-ia64: Optimize small arguments to exit_tb Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1383250929-10288-16-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=aliguori@amazon.com \
    --cc=aurelien@aurel32.net \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).