qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <rth@twiddle.net>
To: qemu-devel@nongnu.org
Cc: "Vassili Karpov (malc)" <av1474@comtv.ru>,
	Richard Henderson <rth@twiddle.net>
Subject: [Qemu-devel] [PATCH for-next 14/15] tcg-ppc64: Streamline tcg_out_tlb_read
Date: Mon,  5 Aug 2013 08:28:49 -1000	[thread overview]
Message-ID: <1375727330-30515-15-git-send-email-rth@twiddle.net> (raw)
In-Reply-To: <1375727330-30515-1-git-send-email-rth@twiddle.net>

Less conditional compilation.  Merge an add insn with the indexed
memory load insn.  Load the tlb addend earlier.  Avoid the address
update memory form.

Signed-off-by: Richard Henderson <rth@twiddle.net>
---
 tcg/ppc64/tcg-target.c | 202 +++++++++++++++++++++++--------------------------
 1 file changed, 95 insertions(+), 107 deletions(-)

diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 90d033c..4b23597 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -31,13 +31,11 @@
 
 static uint8_t *tb_ret_addr;
 
-#define FAST_PATH
-
 #if TARGET_LONG_BITS == 32
-#define LD_ADDR LWZU
+#define LD_ADDR LWZ
 #define CMP_L 0
 #else
-#define LD_ADDR LDU
+#define LD_ADDR LD
 #define CMP_L (1<<21)
 #endif
 
@@ -854,39 +852,64 @@ static const void * const qemu_st_helpers[4] = {
     helper_stq_mmu,
 };
 
-static void tcg_out_tlb_read(TCGContext *s, TCGReg r0, TCGReg r1, TCGReg r2,
-                             TCGReg addr_reg, int s_bits, int offset)
+/* Perform the TLB load and compare.  Places the result of the comparison
+   in CR7, loads the addend of the TLB into R3, and returns the register
+   containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
+
+static TCGReg tcg_out_tlb_read(TCGContext *s, int s_bits, TCGReg addr_reg,
+                               int mem_index, bool is_read)
 {
-#if TARGET_LONG_BITS == 32
-    tcg_out_ext32u(s, addr_reg, addr_reg);
-
-    tcg_out_rlw(s, RLWINM, r0, addr_reg,
-                32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
-                32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS),
-                31 - CPU_TLB_ENTRY_BITS);
-    tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0));
-    tcg_out32(s, LWZU | TAI(r1, r0, offset));
-    tcg_out_rlw(s, RLWINM, r2, addr_reg, 0,
-                (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
-#else
-    tcg_out_rld (s, RLDICL, r0, addr_reg,
-                 64 - TARGET_PAGE_BITS,
-                 64 - CPU_TLB_BITS);
-    tcg_out_shli64(s, r0, r0, CPU_TLB_ENTRY_BITS);
+    size_t offset
+        = (is_read
+           ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+           : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
+
+    /* Extract the page index, shifted into place for tlb index.  */
+    if (TARGET_LONG_BITS == 32) {
+        /* Zero-extend the address into a place helpful for further use.  */
+        tcg_out_ext32u(s, TCG_REG_R4, addr_reg);
+        addr_reg = TCG_REG_R4;
+
+        tcg_out_rlw(s, RLWINM, TCG_REG_R3, addr_reg,
+                    32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
+                    32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS),
+                    31 - CPU_TLB_ENTRY_BITS);
+    } else {
+        tcg_out_rld (s, RLDICL, TCG_REG_R3, addr_reg,
+                     64 - TARGET_PAGE_BITS,
+                     64 - CPU_TLB_BITS);
+        tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS);
+    }
 
-    tcg_out32(s, ADD | TAB(r0, r0, TCG_AREG0));
-    tcg_out32(s, LD_ADDR | TAI(r1, r0, offset));
+    /* Load the tlb comparator.  */
+    tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_AREG0));
+    tcg_out32(s, LD_ADDR | TAI(TCG_REG_R2, TCG_REG_R3, offset));
 
-    if (!s_bits) {
-        tcg_out_rld (s, RLDICR, r2, addr_reg, 0, 63 - TARGET_PAGE_BITS);
-    }
-    else {
-        tcg_out_rld (s, RLDICL, r2, addr_reg,
-                     64 - TARGET_PAGE_BITS,
-                     TARGET_PAGE_BITS - s_bits);
-        tcg_out_rld (s, RLDICL, r2, r2, TARGET_PAGE_BITS, 0);
+    /* Load the TLB addend for use on the fast path.  Do this asap
+       to minimize any load use delay.  */
+    offset = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
+    tcg_out32(s, LD | TAI(TCG_REG_R3, TCG_REG_R3, offset));
+
+    /* Clear the non-page, non-alignment bits from the address.  */
+    if (TARGET_LONG_BITS == 32) {
+        tcg_out_rlw(s, RLWINM, TCG_REG_R0, addr_reg, 0,
+                    (32 - s_bits) & 31, 31 - TARGET_PAGE_BITS);
+    } else {
+        if (!s_bits) {
+            tcg_out_rld (s, RLDICR, TCG_REG_R0, addr_reg,
+                         0, 63 - TARGET_PAGE_BITS);
+        } else {
+            tcg_out_rld (s, RLDICL, TCG_REG_R0, addr_reg,
+                         64 - TARGET_PAGE_BITS,
+                         TARGET_PAGE_BITS - s_bits);
+            tcg_out_rld (s, RLDICL, TCG_REG_R0, TCG_REG_R0,
+                         TARGET_PAGE_BITS, 0);
+        }
     }
-#endif
+
+    tcg_out32(s, CMP | BF(7) | RA(TCG_REG_R0) | RB(TCG_REG_R2) | CMP_L);
+
+    return addr_reg;
 }
 #endif
 
@@ -918,7 +941,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
     PowerOpcode insn;
     int s_bits;
 #ifdef CONFIG_SOFTMMU
-    TCGReg r2, ir;
+    TCGReg ir;
     int mem_index;
     void *label1_ptr, *label2_ptr;
 #endif
@@ -930,26 +953,16 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
 #ifdef CONFIG_SOFTMMU
     mem_index = *args;
 
-    r0 = 3;
-    r1 = 4;
-    r2 = 0;
-    rbase = 0;
-
-    tcg_out_tlb_read (s, r0, r1, r2, addr_reg, s_bits,
-                      offsetof (CPUArchState, tlb_table[mem_index][0].addr_read));
-
-    tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1) | CMP_L);
+    r0 = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true);
 
     label1_ptr = s->code_ptr;
-#ifdef FAST_PATH
-    tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
-#endif
+    tcg_out32(s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
 
     /* slow path */
-    ir = 3;
-    tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0);
-    tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg);
-    tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index);
+    ir = TCG_REG_R3;
+    tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg);
+    tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index);
 
     tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[s_bits], 1, LK);
 
@@ -959,29 +972,23 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
     } else if (data_reg != 3) {
         tcg_out_mov(s, TCG_TYPE_I64, data_reg, 3);
     }
+
     label2_ptr = s->code_ptr;
-    tcg_out32 (s, B);
+    tcg_out32(s, B);
 
     /* label1: fast path */
-#ifdef FAST_PATH
-    reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr);
-#endif
-
-    /* r0 now contains &env->tlb_table[mem_index][index].addr_read */
-    tcg_out32(s, LD | TAI(r0, r0,
-                          offsetof(CPUTLBEntry, addend)
-                          - offsetof(CPUTLBEntry, addr_read)));
-    /* r0 = env->tlb_table[mem_index][index].addend */
-    tcg_out32(s, ADD | TAB(r0, r0, addr_reg));
-    /* r0 = env->tlb_table[mem_index][index].addend + addr */
+    reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr);
 
+    rbase = TCG_REG_R3;
+    r1 = TCG_REG_R0;
 #else  /* !CONFIG_SOFTMMU */
-#if TARGET_LONG_BITS == 32
-    tcg_out_ext32u(s, addr_reg, addr_reg);
-#endif
-    r0 = addr_reg;
-    r1 = 3;
     rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
+    r0 = addr_reg;
+    r1 = TCG_REG_R0;
+    if (TARGET_LONG_BITS == 32) {
+        r0 = TCG_REG_R2;
+        tcg_out_ext32u(s, r0, addr_reg);
+    }
 #endif
 
     insn = qemu_ldx_opc[opc];
@@ -1000,7 +1007,7 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
     }
 
 #ifdef CONFIG_SOFTMMU
-    reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr);
+    reloc_pc24(label2_ptr, (tcg_target_long)s->code_ptr);
 #endif
 }
 
@@ -1009,7 +1016,7 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
     TCGReg addr_reg, r0, r1, rbase, data_reg;
     PowerOpcode insn;
 #ifdef CONFIG_SOFTMMU
-    TCGReg r2, ir;
+    TCGReg ir;
     int mem_index;
     void *label1_ptr, *label2_ptr;
 #endif
@@ -1020,63 +1027,44 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg *args, int opc)
 #ifdef CONFIG_SOFTMMU
     mem_index = *args;
 
-    r0 = 3;
-    r1 = 4;
-    r2 = 0;
-    rbase = 0;
-
-    tcg_out_tlb_read (s, r0, r1, r2, addr_reg, opc,
-                      offsetof (CPUArchState, tlb_table[mem_index][0].addr_write));
-
-    tcg_out32 (s, CMP | BF (7) | RA (r2) | RB (r1) | CMP_L);
+    r0 = tcg_out_tlb_read(s, opc, addr_reg, mem_index, false);
 
     label1_ptr = s->code_ptr;
-#ifdef FAST_PATH
-    tcg_out32 (s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
-#endif
+    tcg_out32(s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
 
     /* slow path */
-    ir = 3;
-    tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0);
-    tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg);
-    tcg_out_rld (s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc)));
-    tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index);
+    ir = TCG_REG_R3;
+    tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0);
+    tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg);
+    tcg_out_rld(s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc)));
+    tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index);
 
     tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1, LK);
 
     label2_ptr = s->code_ptr;
-    tcg_out32 (s, B);
+    tcg_out32(s, B);
 
     /* label1: fast path */
-#ifdef FAST_PATH
-    reloc_pc14 (label1_ptr, (tcg_target_long) s->code_ptr);
-#endif
-
-    tcg_out32 (s, (LD
-                   | RT (r0)
-                   | RA (r0)
-                   | (offsetof (CPUTLBEntry, addend)
-                      - offsetof (CPUTLBEntry, addr_write))
-                   ));
-    /* r0 = env->tlb_table[mem_index][index].addend */
-    tcg_out32(s, ADD | TAB(r0, r0, addr_reg));
-    /* r0 = env->tlb_table[mem_index][index].addend + addr */
+    reloc_pc14(label1_ptr, (tcg_target_long) s->code_ptr);
 
+    rbase = TCG_REG_R3;
+    r1 = TCG_REG_R2;
 #else  /* !CONFIG_SOFTMMU */
-#if TARGET_LONG_BITS == 32
-    tcg_out_ext32u(s, addr_reg, addr_reg);
-#endif
-    r1 = 3;
-    r0 = addr_reg;
     rbase = GUEST_BASE ? TCG_GUEST_BASE_REG : 0;
+    r0 = addr_reg;
+    r1 = TCG_REG_R3;
+    if (TARGET_LONG_BITS == 32) {
+        r0 = TCG_REG_R2;
+        tcg_out_ext32u(s, r0, addr_reg);
+    }
 #endif
 
     insn = qemu_stx_opc[opc];
     if (!HAVE_ISA_2_06 && insn == STDBRX) {
         tcg_out32(s, STWBRX | SAB(data_reg, rbase, r0));
         tcg_out32(s, ADDI | TAI(r1, r0, 4));
-        tcg_out_shri64(s, 0, data_reg, 32);
-        tcg_out32(s, STWBRX | SAB(0, rbase, r1));
+        tcg_out_shri64(s, TCG_REG_R0, data_reg, 32);
+        tcg_out32(s, STWBRX | SAB(TCG_REG_R0, rbase, r1));
     } else {
         tcg_out32(s, insn | SAB(data_reg, rbase, r0));
     }
-- 
1.8.3.1

  parent reply	other threads:[~2013-08-05 18:30 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-08-05 18:28 [Qemu-devel] [PATCH for-next 00/15] Collection of improvements for tcg/ppc64 Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 01/15] tcg-ppc64: Avoid code for nop move Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 02/15] tcg-ppc64: Add an LK argument to tcg_out_call Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 03/15] tcg-ppc64: Use the branch absolute instruction when possible Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 04/15] tcg-ppc64: Don't load the static chain from TCG Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 05/15] tcg-ppc64: Look through the function descriptor when profitable Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 06/15] tcg-ppc64: Move AREG0 to r31 Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 07/15] tcg-ppc64: Tidy register allocation order Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 08/15] tcg-ppc64: Create PowerOpcode Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 09/15] tcg-ppc64: Handle long offsets better Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 10/15] tcg-ppc64: Use indirect jump threading Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 11/15] tcg-ppc64: Setup TCG_REG_TB Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 12/15] tcg-ppc64: Use TCG_REG_TB in tcg_out_movi and tcg_out_mem_long Richard Henderson
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 13/15] tcg-ppc64: Tidy tcg_target_qemu_prologue Richard Henderson
2013-08-05 18:28 ` Richard Henderson [this message]
2013-08-05 18:28 ` [Qemu-devel] [PATCH for-next 15/15] tcg-ppc64: Implement CONFIG_QEMU_LDST_OPTIMIZATION Richard Henderson
2013-08-17  6:23 ` [Qemu-devel] [PATCH for-next 00/15] Collection of improvements for tcg/ppc64 Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1375727330-30515-15-git-send-email-rth@twiddle.net \
    --to=rth@twiddle.net \
    --cc=av1474@comtv.ru \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).