[Qemu-devel] [PULL 15/23] tcg/ppc: enable dynamic TLB sizing

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: peter.maydell@linaro.org
Subject: [Qemu-devel] [PULL 15/23] tcg/ppc: enable dynamic TLB sizing
Date: Mon, 28 Jan 2019 07:58:59 -0800	[thread overview]
Message-ID: <20190128155907.20607-16-richard.henderson@linaro.org> (raw)
In-Reply-To: <20190128155907.20607-1-richard.henderson@linaro.org>

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 tcg/ppc/tcg-target.h     |  2 +-
 tcg/ppc/tcg-target.inc.c | 91 ++++++++++++++++++++++------------------
 2 files changed, 52 insertions(+), 41 deletions(-)

diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index b51854b5cf..95b735b0bb 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -34,7 +34,7 @@
 #define TCG_TARGET_NB_REGS 32
 #define TCG_TARGET_INSN_UNIT_SIZE 4
 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
-#define TCG_TARGET_IMPLEMENTS_DYN_TLB 0
+#define TCG_TARGET_IMPLEMENTS_DYN_TLB 1
 
 typedef enum {
     TCG_REG_R0,  TCG_REG_R1,  TCG_REG_R2,  TCG_REG_R3,
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 8c1cfdd7ac..773690f1d9 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -327,6 +327,7 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define LHZ    OPCD( 40)
 #define LHA    OPCD( 42)
 #define LWZ    OPCD( 32)
+#define LWZUX  XO31( 55)
 #define STB    OPCD( 38)
 #define STH    OPCD( 44)
 #define STW    OPCD( 36)
@@ -338,6 +339,7 @@ static int tcg_target_const_match(tcg_target_long val, TCGType type,
 #define LD     XO58(  0)
 #define LDX    XO31( 21)
 #define LDU    XO58(  1)
+#define LDUX   XO31( 53)
 #define LWA    XO58(  2)
 #define LWAX   XO31(341)
 
@@ -1503,6 +1505,10 @@ static void * const qemu_st_helpers[16] = {
     [MO_BEQ]  = helper_be_stq_mmu,
 };
 
+/* We expect tlb_mask to be before tlb_table.  */
+QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table) <
+                  offsetof(CPUArchState, tlb_mask));
+
 /* Perform the TLB load and compare.  Places the result of the comparison
    in CR7, loads the addend of the TLB into R3, and returns the register
    containing the guest address (zero-extended into R4).  Clobbers R0 and R2. */
@@ -1513,61 +1519,63 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
 {
     int cmp_off
         = (is_read
-           ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
-           : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
-    int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
-    TCGReg base = TCG_AREG0;
+           ? offsetof(CPUTLBEntry, addr_read)
+           : offsetof(CPUTLBEntry, addr_write));
+    int mask_off = offsetof(CPUArchState, tlb_mask[mem_index]);
+    int table_off = offsetof(CPUArchState, tlb_table[mem_index]);
+    TCGReg mask_base = TCG_AREG0, table_base = TCG_AREG0;
     unsigned s_bits = opc & MO_SIZE;
     unsigned a_bits = get_alignment_bits(opc);
 
-    /* Extract the page index, shifted into place for tlb index.  */
-    if (TCG_TARGET_REG_BITS == 64) {
-        if (TARGET_LONG_BITS == 32) {
-            /* Zero-extend the address into a place helpful for further use. */
-            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
-            addrlo = TCG_REG_R4;
-        } else {
-            tcg_out_rld(s, RLDICL, TCG_REG_R3, addrlo,
-                        64 - TARGET_PAGE_BITS, 64 - CPU_TLB_BITS);
+    if (table_off > 0x7fff) {
+        int mask_hi = mask_off - (int16_t)mask_off;
+        int table_hi = table_off - (int16_t)table_off;
+
+        table_base = TCG_REG_R4;
+        if (mask_hi == table_hi) {
+            mask_base = table_base;
+        } else if (mask_hi) {
+            mask_base = TCG_REG_R3;
+            tcg_out32(s, ADDIS | TAI(mask_base, TCG_AREG0, mask_hi >> 16));
         }
+        tcg_out32(s, ADDIS | TAI(table_base, TCG_AREG0, table_hi >> 16));
+        mask_off -= mask_hi;
+        table_off -= table_hi;
     }
 
-    /* Compensate for very large offsets.  */
-    if (add_off >= 0x8000) {
-        int low = (int16_t)cmp_off;
-        int high = cmp_off - low;
-        assert((high & 0xffff) == 0);
-        assert(cmp_off - high == (int16_t)(cmp_off - high));
-        assert(add_off - high == (int16_t)(add_off - high));
-        tcg_out32(s, ADDIS | TAI(TCG_REG_TMP1, base, high >> 16));
-        base = TCG_REG_TMP1;
-        cmp_off -= high;
-        add_off -= high;
-    }
+    /* Load tlb_mask[mmu_idx] and tlb_table[mmu_idx].  */
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, mask_base, mask_off);
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R4, table_base, table_off);
 
-    /* Extraction and shifting, part 2.  */
-    if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
-        tcg_out_rlw(s, RLWINM, TCG_REG_R3, addrlo,
-                    32 - (TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS),
-                    32 - (CPU_TLB_BITS + CPU_TLB_ENTRY_BITS),
-                    31 - CPU_TLB_ENTRY_BITS);
+    /* Extract the page index, shifted into place for tlb index.  */
+    if (TCG_TARGET_REG_BITS == 32) {
+        tcg_out_shri32(s, TCG_REG_TMP1, addrlo,
+                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
     } else {
-        tcg_out_shli64(s, TCG_REG_R3, TCG_REG_R3, CPU_TLB_ENTRY_BITS);
+        tcg_out_shri64(s, TCG_REG_TMP1, addrlo,
+                       TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
     }
+    tcg_out32(s, AND | SAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_TMP1));
 
-    tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, base));
-
-    /* Load the tlb comparator.  */
-    if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
-        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
-        tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
+    /* Load the TLB comparator.  */
+    if (cmp_off == 0 && TCG_TARGET_REG_BITS >= TARGET_LONG_BITS) {
+        uint32_t lxu = (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32
+                        ? LWZUX : LDUX);
+        tcg_out32(s, lxu | TAB(TCG_REG_TMP1, TCG_REG_R3, TCG_REG_R4));
     } else {
-        tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
+        tcg_out32(s, ADD | TAB(TCG_REG_R3, TCG_REG_R3, TCG_REG_R4));
+        if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
+            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_REG_R3, cmp_off + 4);
+            tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_R4, TCG_REG_R3, cmp_off);
+        } else {
+            tcg_out_ld(s, TCG_TYPE_TL, TCG_REG_TMP1, TCG_REG_R3, cmp_off);
+        }
     }
 
     /* Load the TLB addend for use on the fast path.  Do this asap
        to minimize any load use delay.  */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off);
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3,
+               offsetof(CPUTLBEntry, addend));
 
     /* Clear the non-page, non-alignment bits from the address */
     if (TCG_TARGET_REG_BITS == 32) {
@@ -1600,6 +1608,9 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
         if (TARGET_LONG_BITS == 32) {
             tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
                         (32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
+            /* Zero-extend the address for use in the final address.  */
+            tcg_out_ext32u(s, TCG_REG_R4, addrlo);
+            addrlo = TCG_REG_R4;
         } else if (a_bits == 0) {
             tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
         } else {
-- 
2.17.2

next prev parent reply	other threads:[~2019-01-28 15:59 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-01-28 15:58 [Qemu-devel] [PULL 00/23] tcg queued patches Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 01/23] tcg: Add logical simplifications during gvec expand Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 02/23] tcg: Add gvec expanders for nand, nor, eqv Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 03/23] tcg: Add write_aofs to GVecGen4 Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 04/23] tcg: Add opcodes for vector saturated arithmetic Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 05/23] tcg: Add opcodes for vector minmax arithmetic Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 06/23] tcg/i386: Split subroutines out of tcg_expand_vec_op Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 07/23] tcg/i386: Implement vector saturating arithmetic Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 08/23] tcg/i386: Implement vector minmax arithmetic Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 09/23] tcg/aarch64: Implement vector saturating arithmetic Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 10/23] tcg/aarch64: Implement vector minmax arithmetic Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 11/23] cputlb: do not evict empty entries to the vtlb Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 12/23] tcg: introduce dynamic TLB sizing Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 13/23] tcg/i386: enable " Richard Henderson
2019-01-28 15:58 ` [Qemu-devel] [PULL 14/23] tcg/aarch64: " Richard Henderson
2019-01-28 15:58 ` Richard Henderson [this message]
2019-01-28 15:59 ` [Qemu-devel] [PULL 16/23] tcg/sparc: " Richard Henderson
2019-01-28 15:59 ` [Qemu-devel] [PULL 17/23] tcg/s390: " Richard Henderson
2019-01-28 15:59 ` [Qemu-devel] [PULL 18/23] tcg/riscv: " Richard Henderson
2019-01-28 15:59 ` [Qemu-devel] [PULL 19/23] tcg/arm: " Richard Henderson
2019-01-28 15:59 ` [Qemu-devel] [PULL 20/23] tcg/mips: Fix tcg_out_qemu_ld_slow_path Richard Henderson
2019-01-28 15:59 ` [Qemu-devel] [PULL 21/23] tcg/mips: enable dynamic TLB sizing Richard Henderson
2019-01-28 15:59 ` [Qemu-devel] [PULL 22/23] tcg/tci: " Richard Henderson
2019-01-28 15:59 ` [Qemu-devel] [PULL 23/23] cputlb: Remove static tlb sizing Richard Henderson
2019-01-28 18:44 ` [Qemu-devel] [PULL 00/23] tcg queued patches Peter Maydell
2019-01-31 17:53 ` no-reply

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:b51854b5c dfblob:95b735b0b dfblob:8c1cfdd7a dfblob:773690f1d )
 OR (
bs:"[Qemu-devel] [PULL 15/23] tcg/ppc: enable dynamic TLB sizing" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190128155907.20607-16-richard.henderson@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).