[Qemu-devel] [PATCH for-2.3 0/3] Support more than 8 MMU modes, speedup by 10%

qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed

* [Qemu-devel] [PATCH for-2.3 0/3] Support more than 8 MMU modes, speedup by 10%
@ 2015-02-20 12:45 Paolo Bonzini
  2015-02-20 12:45 ` [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS Paolo Bonzini
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Paolo Bonzini @ 2015-02-20 12:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: agraf

Patches 1 and 2 enable support from more than 8 MMU modes in TCG (patch
1 is in the targets, patch 2 is in cpu-defs.h).  The TLB size is reduced
proportionally on targets where that is necessary.

Patch 3 uses the new support in the PPC target.

Paolo

Paolo Bonzini (3):
  tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS
  softmmu: support up to 12 MMU modes
  target-ppc: use separate indices for various translation modes

 include/exec/cpu-defs.h  |  34 +++++++++++++++-
 include/exec/cpu_ldst.h  | 104 ++++++++++++++++++++++++++++++++++++++++++++---
 target-ppc/cpu.h         |   7 +---
 target-ppc/excp_helper.c |   3 --
 target-ppc/helper_regs.h |  11 ++---
 tcg/aarch64/tcg-target.h |   1 +
 tcg/arm/tcg-target.h     |   1 +
 tcg/i386/tcg-target.h    |   1 +
 tcg/ia64/tcg-target.h    |   2 +
 tcg/mips/tcg-target.h    |   1 +
 tcg/ppc/tcg-target.h     |   1 +
 tcg/s390/tcg-target.h    |   1 +
 tcg/sparc/tcg-target.h   |   1 +
 tcg/tci/tcg-target.h     |   1 +
 14 files changed, 148 insertions(+), 21 deletions(-)

-- 
2.3.0

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS
  2015-02-20 12:45 [Qemu-devel] [PATCH for-2.3 0/3] Support more than 8 MMU modes, speedup by 10% Paolo Bonzini
@ 2015-02-20 12:45 ` Paolo Bonzini
  2015-02-20 12:45 ` [Qemu-devel] [PATCH 2/3] softmmu: support up to 12 MMU modes Paolo Bonzini
  2015-02-20 12:45 ` [Qemu-devel] [PATCH 3/3] target-ppc: use separate indices for various translation modes Paolo Bonzini
  2 siblings, 0 replies; 10+ messages in thread
From: Paolo Bonzini @ 2015-02-20 12:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: agraf

This will be used to size the TLB when more than 8 MMU modes are
used by the target.  Limitations come from the limited size of
the immediate fields (which sometimes, as in the case of Aarch64,
extend to instructions that shift the immediate).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tcg/aarch64/tcg-target.h | 1 +
 tcg/arm/tcg-target.h     | 1 +
 tcg/i386/tcg-target.h    | 1 +
 tcg/ia64/tcg-target.h    | 2 ++
 tcg/mips/tcg-target.h    | 1 +
 tcg/ppc/tcg-target.h     | 1 +
 tcg/s390/tcg-target.h    | 1 +
 tcg/sparc/tcg-target.h   | 1 +
 tcg/tci/tcg-target.h     | 1 +
 9 files changed, 10 insertions(+)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 60c7493..8aec04d 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -14,6 +14,7 @@
 #define TCG_TARGET_AARCH64 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE  4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
 #undef TCG_TARGET_STACK_GROWSUP
 
 typedef enum {
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 1c719e2..6559f80 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -27,6 +27,7 @@
 
 #undef TCG_TARGET_STACK_GROWSUP
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 
 typedef enum {
     TCG_REG_R0 = 0,
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 7a9980e..8ba977a 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -25,6 +25,7 @@
 #define TCG_TARGET_I386 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE  1
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 
 #ifdef __x86_64__
 # define TCG_TARGET_REG_BITS  64
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index d675589..a04ed81 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -26,6 +26,8 @@
 #define TCG_TARGET_IA64 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE 16
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 21
+
 typedef struct {
     uint64_t lo __attribute__((aligned(16)));
     uint64_t hi;
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index c88a1c9..f5ba52c 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -27,6 +27,7 @@
 #define TCG_TARGET_MIPS 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 #define TCG_TARGET_NB_REGS 32
 
 typedef enum {
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 32ac442..7ce7048 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -32,6 +32,7 @@
 
 #define TCG_TARGET_NB_REGS 32
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 
 typedef enum {
     TCG_REG_R0,  TCG_REG_R1,  TCG_REG_R2,  TCG_REG_R3,
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 5acc28c..91576d5 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -25,6 +25,7 @@
 #define TCG_TARGET_S390 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE 2
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
 
 typedef enum TCGReg {
     TCG_REG_R0 = 0,
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 0c4c8af..f584de4 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -27,6 +27,7 @@
 #define TCG_TARGET_REG_BITS 64
 
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 #define TCG_TARGET_NB_REGS 32
 
 typedef enum {
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index bd1e974..4c41305 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -44,6 +44,7 @@
 
 #define TCG_TARGET_INTERPRETER 1
 #define TCG_TARGET_INSN_UNIT_SIZE 1
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 
 #if UINTPTR_MAX == UINT32_MAX
 # define TCG_TARGET_REG_BITS 32
-- 
2.3.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH 2/3] softmmu: support up to 12 MMU modes
  2015-02-20 12:45 [Qemu-devel] [PATCH for-2.3 0/3] Support more than 8 MMU modes, speedup by 10% Paolo Bonzini
  2015-02-20 12:45 ` [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS Paolo Bonzini
@ 2015-02-20 12:45 ` Paolo Bonzini
  2015-02-20 12:45 ` [Qemu-devel] [PATCH 3/3] target-ppc: use separate indices for various translation modes Paolo Bonzini
  2 siblings, 0 replies; 10+ messages in thread
From: Paolo Bonzini @ 2015-02-20 12:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: agraf

At 8k per TLB (for 64-bit host or target), 8 or more modes
make the TLBs bigger than 64k, and some RISC TCG backends do
not like that.  On the affected hosts, cut the TLB size in
half---there is still a measurable speedup on PPC with the
next patch.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/exec/cpu-defs.h |  34 +++++++++++++++-
 include/exec/cpu_ldst.h | 104 +++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 130 insertions(+), 8 deletions(-)

diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index 0ca6f0b..2662de5 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -27,6 +27,7 @@
 #include <inttypes.h>
 #include "qemu/osdep.h"
 #include "qemu/queue.h"
+#include "tcg-target.h"
 #ifndef CONFIG_USER_ONLY
 #include "exec/hwaddr.h"
 #endif
@@ -69,8 +70,6 @@ typedef uint64_t target_ulong;
 #define TB_JMP_PAGE_MASK (TB_JMP_CACHE_SIZE - TB_JMP_PAGE_SIZE)
 
 #if !defined(CONFIG_USER_ONLY)
-#define CPU_TLB_BITS 8
-#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
 /* use a fully associative victim tlb of 8 entries */
 #define CPU_VTLB_SIZE 8
 
@@ -80,6 +79,37 @@ typedef uint64_t target_ulong;
 #define CPU_TLB_ENTRY_BITS 5
 #endif
 
+/* TCG_TARGET_TLB_DISPLACEMENT_BITS is used in CPU_TLB_BITS to ensure that
+ * the TLB is not unnecessarily small, but still small enough for the
+ * TLB lookup instruction sequence used by the TCG target.
+ *
+ * TCG will have to generate an operand as large as the distance between
+ * tlb_table[0][0] and the tlb_table[NB_MMU_MODES - 1][0].addend.  For
+ * simplicity, the TCG targets just round everything up to the next
+ * power of two, and count bits.  This works because: 1) the size of each
+ * TLB is a largish power of two, 2) and because the limit of the
+ * displacement is really close to a power of two.
+ *
+ * For example, the maximum displacement 0xFFF0 on PPC and MIPS, but TCG
+ * just says "the displacement is 16 bits".  TCG_TARGET_TLB_DISPLACEMENT_BITS
+ * then ensures that tlb_table at least 0x8000 bytes large ("not unnecessarily
+ * small": 2^15).  The operand then will come up smaller than 0xFFF0 without
+ * any particular care, because the TLB for a single MMU mode is larger than
+ * 0x10000-0xFFF0=16 bytes.  In the end, the maximum value of the operand
+ * will be something like 0xC018; here, 0xC000 is the offset of the last TLB
+ * table (tlb_table[NB_MMU_MODES - 1][0]), and 0x18 is the offset of the
+ * addend field in each TLB entry.
+ */
+#define CPU_TLB_BITS                                             \
+    MIN(8,                                                       \
+        TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS -  \
+        (NB_MMU_MODES <= 1 ? 0 :                                 \
+         NB_MMU_MODES <= 2 ? 1 :                                 \
+         NB_MMU_MODES <= 4 ? 2 :                                 \
+         NB_MMU_MODES <= 8 ? 3 : 4))
+
+#define CPU_TLB_SIZE (1 << CPU_TLB_BITS)
+
 typedef struct CPUTLBEntry {
     /* bit TARGET_LONG_BITS to TARGET_PAGE_BITS : virtual address
        bit TARGET_PAGE_BITS-1..4  : Nonzero for accesses that should not
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index 1673287..0ec398c 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -263,12 +263,104 @@ uint64_t helper_ldq_cmmu(CPUArchState *env, target_ulong addr, int mmu_idx);
 #undef MEMSUFFIX
 #endif /* (NB_MMU_MODES >= 7) */
 
-#if (NB_MMU_MODES > 7)
-/* Note that supporting NB_MMU_MODES == 9 would require
- * changes to at least the ARM TCG backend.
- */
-#error "NB_MMU_MODES > 7 is not supported for now"
-#endif /* (NB_MMU_MODES > 7) */
+#if (NB_MMU_MODES >= 8) && defined(MMU_MODE7_SUFFIX)
+
+#define CPU_MMU_INDEX 7
+#define MEMSUFFIX MMU_MODE7_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 8) */
+
+#if (NB_MMU_MODES >= 9) && defined(MMU_MODE8_SUFFIX)
+
+#define CPU_MMU_INDEX 8
+#define MEMSUFFIX MMU_MODE8_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 9) */
+
+#if (NB_MMU_MODES >= 10) && defined(MMU_MODE9_SUFFIX)
+
+#define CPU_MMU_INDEX 9
+#define MEMSUFFIX MMU_MODE9_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 10) */
+
+#if (NB_MMU_MODES >= 11) && defined(MMU_MODE10_SUFFIX)
+
+#define CPU_MMU_INDEX 10
+#define MEMSUFFIX MMU_MODE10_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 11) */
+
+#if (NB_MMU_MODES >= 12) && defined(MMU_MODE11_SUFFIX)
+
+#define CPU_MMU_INDEX 11
+#define MEMSUFFIX MMU_MODE11_SUFFIX
+#define DATA_SIZE 1
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 2
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 4
+#include "exec/cpu_ldst_template.h"
+
+#define DATA_SIZE 8
+#include "exec/cpu_ldst_template.h"
+#undef CPU_MMU_INDEX
+#undef MEMSUFFIX
+#endif /* (NB_MMU_MODES >= 12) */
+
+#if (NB_MMU_MODES > 12)
+#error "NB_MMU_MODES > 12 is not supported for now"
+#endif /* (NB_MMU_MODES > 12) */
 
 /* these access are slower, they must be as rare as possible */
 #define CPU_MMU_INDEX (cpu_mmu_index(env))
-- 
2.3.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH 3/3] target-ppc: use separate indices for various translation modes
  2015-02-20 12:45 [Qemu-devel] [PATCH for-2.3 0/3] Support more than 8 MMU modes, speedup by 10% Paolo Bonzini
  2015-02-20 12:45 ` [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS Paolo Bonzini
  2015-02-20 12:45 ` [Qemu-devel] [PATCH 2/3] softmmu: support up to 12 MMU modes Paolo Bonzini
@ 2015-02-20 12:45 ` Paolo Bonzini
  2015-02-20 13:00   ` Alexander Graf
  2 siblings, 1 reply; 10+ messages in thread
From: Paolo Bonzini @ 2015-02-20 12:45 UTC (permalink / raw)
  To: qemu-devel; +Cc: agraf

PowerPC TCG flushes the TLB on every IR/DR change, which basically
means on every user<->kernel context switch.  Encode IR/DR in the
MMU index.

This brings the number of TLB flushes down from ~900000 to ~50000
for starting up the Debian installer, which is in line with x86
and gives a ~10% performance improvement.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-ppc/cpu.h         |  7 ++-----
 target-ppc/excp_helper.c |  3 ---
 target-ppc/helper_regs.h | 11 ++++++-----
 3 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index aae33a9..610d884 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -943,7 +943,8 @@ struct ppc_segment_page_sizes {
 
 /*****************************************************************************/
 /* The whole PowerPC CPU context */
-#define NB_MMU_MODES 3
+#define NB_MMU_MODES 12
+#define MMU_USER_IDX 3  /* PR=IR=DR=1 */
 
 #define PPC_CPU_OPCODES_LEN          0x40
 #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
@@ -1252,10 +1253,6 @@ static inline CPUPPCState *cpu_init(const char *cpu_model)
 #define cpu_list ppc_cpu_list
 
 /* MMU modes definitions */
-#define MMU_MODE0_SUFFIX _user
-#define MMU_MODE1_SUFFIX _kernel
-#define MMU_MODE2_SUFFIX _hypv
-#define MMU_USER_IDX 0
 static inline int cpu_mmu_index (CPUPPCState *env)
 {
     return env->mmu_idx;
diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
index b803475..f608701 100644
--- a/target-ppc/excp_helper.c
+++ b/target-ppc/excp_helper.c
@@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
 
     if (env->spr[SPR_LPCR] & LPCR_AIL) {
         new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
-    } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) {
-        /* If we disactivated any translation, flush TLBs */
-        tlb_flush(cs, 1);
     }
 
 #ifdef TARGET_PPC64
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 271fddf..23b8ded 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -41,12 +41,15 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
 
 static inline void hreg_compute_mem_idx(CPUPPCState *env)
 {
+    int high;
+
     /* Precompute MMU index */
     if (msr_pr == 0 && msr_hv != 0) {
-        env->mmu_idx = 2;
+        high = 2;
     } else {
-        env->mmu_idx = 1 - msr_pr;
+        high = 1 - msr_pr;
     }
+    env->mmu_idx = (high << 2) | (msr_ir << 1) | msr_dr;
 }
 
 static inline void hreg_compute_hflags(CPUPPCState *env)
@@ -56,7 +59,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env)
     /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */
     hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) |
         (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) |
-        (1 << MSR_LE) | (1 << MSR_VSX);
+        (1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR);
     hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB;
     hreg_compute_mem_idx(env);
     env->hflags = env->msr & hflags_mask;
@@ -82,8 +85,6 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
     }
     if (((value >> MSR_IR) & 1) != msr_ir ||
         ((value >> MSR_DR) & 1) != msr_dr) {
-        /* Flush all tlb when changing translation mode */
-        tlb_flush(cs, 1);
         excp = POWERPC_EXCP_NONE;
         cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
     }
-- 
2.3.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3] target-ppc: use separate indices for various translation modes
  2015-02-20 12:45 ` [Qemu-devel] [PATCH 3/3] target-ppc: use separate indices for various translation modes Paolo Bonzini
@ 2015-02-20 13:00   ` Alexander Graf
  2015-02-20 17:58     ` Paolo Bonzini
  0 siblings, 1 reply; 10+ messages in thread
From: Alexander Graf @ 2015-02-20 13:00 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel



On 20.02.15 13:45, Paolo Bonzini wrote:
> PowerPC TCG flushes the TLB on every IR/DR change, which basically
> means on every user<->kernel context switch.  Encode IR/DR in the
> MMU index.
> 
> This brings the number of TLB flushes down from ~900000 to ~50000
> for starting up the Debian installer, which is in line with x86
> and gives a ~10% performance improvement.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  target-ppc/cpu.h         |  7 ++-----
>  target-ppc/excp_helper.c |  3 ---
>  target-ppc/helper_regs.h | 11 ++++++-----
>  3 files changed, 8 insertions(+), 13 deletions(-)
> 
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index aae33a9..610d884 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -943,7 +943,8 @@ struct ppc_segment_page_sizes {
>  
>  /*****************************************************************************/
>  /* The whole PowerPC CPU context */
> -#define NB_MMU_MODES 3
> +#define NB_MMU_MODES 12
> +#define MMU_USER_IDX 3  /* PR=IR=DR=1 */
>  
>  #define PPC_CPU_OPCODES_LEN          0x40
>  #define PPC_CPU_INDIRECT_OPCODES_LEN 0x20
> @@ -1252,10 +1253,6 @@ static inline CPUPPCState *cpu_init(const char *cpu_model)
>  #define cpu_list ppc_cpu_list
>  
>  /* MMU modes definitions */
> -#define MMU_MODE0_SUFFIX _user
> -#define MMU_MODE1_SUFFIX _kernel
> -#define MMU_MODE2_SUFFIX _hypv
> -#define MMU_USER_IDX 0
>  static inline int cpu_mmu_index (CPUPPCState *env)
>  {
>      return env->mmu_idx;
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index b803475..f608701 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>  
>      if (env->spr[SPR_LPCR] & LPCR_AIL) {
>          new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
> -    } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) {
> -        /* If we disactivated any translation, flush TLBs */
> -        tlb_flush(cs, 1);
>      }
>  
>  #ifdef TARGET_PPC64
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 271fddf..23b8ded 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -41,12 +41,15 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
>  
>  static inline void hreg_compute_mem_idx(CPUPPCState *env)
>  {
> +    int high;
> +
>      /* Precompute MMU index */
>      if (msr_pr == 0 && msr_hv != 0) {
> -        env->mmu_idx = 2;
> +        high = 2;

Could you instead do something like

  uint32_t mmu_idx = 0;

  mmu_idx |= MMU_IDX_HV;

>      } else {
> -        env->mmu_idx = 1 - msr_pr;
> +        high = 1 - msr_pr;

  mmu_idx |= (msr_pr & 1) ? MMU_IDX_PR;

>      }
> +    env->mmu_idx = (high << 2) | (msr_ir << 1) | msr_dr;

  mmu_idx |= (msr_ir & 1) ? MMU_IDX_IR;
  mmu_idx |= (msr_dr & 1) ? MMU_IDX_DR;
  env->mmu_idx = mmu_idx;

and check whether the compiler is smart enough to optimize this out
considering that it's all constants?

Also please double-check that 440 still works. That was the target that
gave me the most headaches on DR/IR switching so far.

Otherwise looks simple and clean to me :).


Alex

>  }
>  
>  static inline void hreg_compute_hflags(CPUPPCState *env)
> @@ -56,7 +59,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env)
>      /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */
>      hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) |
>          (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) |
> -        (1 << MSR_LE) | (1 << MSR_VSX);
> +        (1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR);
>      hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB;
>      hreg_compute_mem_idx(env);
>      env->hflags = env->msr & hflags_mask;
> @@ -82,8 +85,6 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
>      }
>      if (((value >> MSR_IR) & 1) != msr_ir ||
>          ((value >> MSR_DR) & 1) != msr_dr) {
> -        /* Flush all tlb when changing translation mode */
> -        tlb_flush(cs, 1);
>          excp = POWERPC_EXCP_NONE;
>          cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
>      }
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 3/3] target-ppc: use separate indices for various translation modes
  2015-02-20 13:00   ` Alexander Graf
@ 2015-02-20 17:58     ` Paolo Bonzini
  0 siblings, 0 replies; 10+ messages in thread
From: Paolo Bonzini @ 2015-02-20 17:58 UTC (permalink / raw)
  To: Alexander Graf, qemu-devel



On 20/02/2015 14:00, Alexander Graf wrote:
> Also please double-check that 440 still works. That was the target that
> gave me the most headaches on DR/IR switching so far.

The ppc-virtexml507-linux-2_6_34.tgz image works for me.

Paolo

> Otherwise looks simple and clean to me :).
> 
> 
> Alex
> 
>>  }
>>  
>>  static inline void hreg_compute_hflags(CPUPPCState *env)
>> @@ -56,7 +59,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env)
>>      /* We 'forget' FE0 & FE1: we'll never generate imprecise exceptions */
>>      hflags_mask = (1 << MSR_VR) | (1 << MSR_AP) | (1 << MSR_SA) |
>>          (1 << MSR_PR) | (1 << MSR_FP) | (1 << MSR_SE) | (1 << MSR_BE) |
>> -        (1 << MSR_LE) | (1 << MSR_VSX);
>> +        (1 << MSR_LE) | (1 << MSR_VSX) | (1 << MSR_IR) | (1 << MSR_DR);
>>      hflags_mask |= (1ULL << MSR_CM) | (1ULL << MSR_SF) | MSR_HVB;
>>      hreg_compute_mem_idx(env);
>>      env->hflags = env->msr & hflags_mask;
>> @@ -82,8 +85,6 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
>>      }
>>      if (((value >> MSR_IR) & 1) != msr_ir ||
>>          ((value >> MSR_DR) & 1) != msr_dr) {
>> -        /* Flush all tlb when changing translation mode */
>> -        tlb_flush(cs, 1);
>>          excp = POWERPC_EXCP_NONE;
>>          cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
>>      }
>>
> 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH v2 for-2.3 0/3] Support more than 8 MMU modes, speedup PPC by 10%
@ 2015-02-20 17:57 Paolo Bonzini
  2015-02-20 17:57 ` [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS Paolo Bonzini
  0 siblings, 1 reply; 10+ messages in thread
From: Paolo Bonzini @ 2015-02-20 17:57 UTC (permalink / raw)
  To: qemu-devel; +Cc: agraf

Patches 1 and 2 enable support from more than 8 MMU modes in TCG (patch
1 is in the targets, patch 2 is in cpu-defs.h).  The TLB size is reduced
proportionally on targets where that is necessary.

Patch 3 uses the new support in the PPC target.

Paolo

Paolo Bonzini (3):
  tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS
  softmmu: support up to 12 MMU modes
  target-ppc: use separate indices for various translation modes

 include/exec/cpu-defs.h  |  34 +++++++++++++++-
 include/exec/cpu_ldst.h  | 104 ++++++++++++++++++++++++++++++++++++++++++++---
 target-ppc/cpu.h         |  12 +++---
 target-ppc/excp_helper.c |   3 --
 target-ppc/helper_regs.h |  15 ++++---
 tcg/aarch64/tcg-target.h |   1 +
 tcg/arm/tcg-target.h     |   1 +
 tcg/i386/tcg-target.h    |   1 +
 tcg/ia64/tcg-target.h    |   2 +
 tcg/mips/tcg-target.h    |   1 +
 tcg/ppc/tcg-target.h     |   1 +
 tcg/s390/tcg-target.h    |   1 +
 tcg/sparc/tcg-target.h   |   1 +
 tcg/tci/tcg-target.h     |   1 +
 14 files changed, 156 insertions(+), 22 deletions(-)

-- 
2.3.0

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS
  2015-02-20 17:57 [Qemu-devel] [PATCH v2 for-2.3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Paolo Bonzini
@ 2015-02-20 17:57 ` Paolo Bonzini
  2015-02-20 19:36   ` Richard Henderson
  0 siblings, 1 reply; 10+ messages in thread
From: Paolo Bonzini @ 2015-02-20 17:57 UTC (permalink / raw)
  To: qemu-devel; +Cc: agraf

This will be used to size the TLB when more than 8 MMU modes are
used by the target.  Limitations come from the limited size of
the immediate fields (which sometimes, as in the case of Aarch64,
extend to instructions that shift the immediate).

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tcg/aarch64/tcg-target.h | 1 +
 tcg/arm/tcg-target.h     | 1 +
 tcg/i386/tcg-target.h    | 1 +
 tcg/ia64/tcg-target.h    | 2 ++
 tcg/mips/tcg-target.h    | 1 +
 tcg/ppc/tcg-target.h     | 1 +
 tcg/s390/tcg-target.h    | 1 +
 tcg/sparc/tcg-target.h   | 1 +
 tcg/tci/tcg-target.h     | 1 +
 9 files changed, 10 insertions(+)

diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
index 60c7493..8aec04d 100644
--- a/tcg/aarch64/tcg-target.h
+++ b/tcg/aarch64/tcg-target.h
@@ -14,6 +14,7 @@
 #define TCG_TARGET_AARCH64 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE  4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 24
 #undef TCG_TARGET_STACK_GROWSUP
 
 typedef enum {
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 1c719e2..6559f80 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -27,6 +27,7 @@
 
 #undef TCG_TARGET_STACK_GROWSUP
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 
 typedef enum {
     TCG_REG_R0 = 0,
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 7a9980e..8ba977a 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -25,6 +25,7 @@
 #define TCG_TARGET_I386 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE  1
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 
 #ifdef __x86_64__
 # define TCG_TARGET_REG_BITS  64
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index d675589..a04ed81 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -26,6 +26,8 @@
 #define TCG_TARGET_IA64 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE 16
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 21
+
 typedef struct {
     uint64_t lo __attribute__((aligned(16)));
     uint64_t hi;
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index c88a1c9..f5ba52c 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -27,6 +27,7 @@
 #define TCG_TARGET_MIPS 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 #define TCG_TARGET_NB_REGS 32
 
 typedef enum {
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 32ac442..7ce7048 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -32,6 +32,7 @@
 
 #define TCG_TARGET_NB_REGS 32
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
 
 typedef enum {
     TCG_REG_R0,  TCG_REG_R1,  TCG_REG_R2,  TCG_REG_R3,
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index 5acc28c..91576d5 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -25,6 +25,7 @@
 #define TCG_TARGET_S390 1
 
 #define TCG_TARGET_INSN_UNIT_SIZE 2
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 19
 
 typedef enum TCGReg {
     TCG_REG_R0 = 0,
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index 0c4c8af..f584de4 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -27,6 +27,7 @@
 #define TCG_TARGET_REG_BITS 64
 
 #define TCG_TARGET_INSN_UNIT_SIZE 4
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 #define TCG_TARGET_NB_REGS 32
 
 typedef enum {
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index bd1e974..4c41305 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -44,6 +44,7 @@
 
 #define TCG_TARGET_INTERPRETER 1
 #define TCG_TARGET_INSN_UNIT_SIZE 1
+#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
 
 #if UINTPTR_MAX == UINT32_MAX
 # define TCG_TARGET_REG_BITS 32
-- 
2.3.0

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS
  2015-02-20 17:57 ` [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS Paolo Bonzini
@ 2015-02-20 19:36   ` Richard Henderson
  2015-02-23 15:21     ` Alexander Graf
  0 siblings, 1 reply; 10+ messages in thread
From: Richard Henderson @ 2015-02-20 19:36 UTC (permalink / raw)
  To: Paolo Bonzini, qemu-devel; +Cc: agraf

On 02/20/2015 09:57 AM, Paolo Bonzini wrote:
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index 7a9980e..8ba977a 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -25,6 +25,7 @@
>  #define TCG_TARGET_I386 1
>  
>  #define TCG_TARGET_INSN_UNIT_SIZE  1
> +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32

31.

Positive displacements only in 64-bit mode.  I don't think it's worth
conditionalizing this for 32-bit, since we can't actually allocate 2G of TLBs
and then actually accomplish anything.  ;-)

> diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
> index c88a1c9..f5ba52c 100644
> --- a/tcg/mips/tcg-target.h
> +++ b/tcg/mips/tcg-target.h
> @@ -27,6 +27,7 @@
>  #define TCG_TARGET_MIPS 1
>  
>  #define TCG_TARGET_INSN_UNIT_SIZE 4
> +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
>  #define TCG_TARGET_NB_REGS 32
>  
>  typedef enum {
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index 32ac442..7ce7048 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -32,6 +32,7 @@
>  
>  #define TCG_TARGET_NB_REGS 32
>  #define TCG_TARGET_INSN_UNIT_SIZE 4
> +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16

Close enough, since the BUILD_BUG_ON should still catch this out if somehow the
size is within that last 16 bytes of 64k.


r~

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS
  2015-02-20 19:36   ` Richard Henderson
@ 2015-02-23 15:21     ` Alexander Graf
  2015-02-23 16:03       ` Richard Henderson
  0 siblings, 1 reply; 10+ messages in thread
From: Alexander Graf @ 2015-02-23 15:21 UTC (permalink / raw)
  To: Richard Henderson, Paolo Bonzini, qemu-devel



On 20.02.15 20:36, Richard Henderson wrote:
> On 02/20/2015 09:57 AM, Paolo Bonzini wrote:
>> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
>> index 7a9980e..8ba977a 100644
>> --- a/tcg/i386/tcg-target.h
>> +++ b/tcg/i386/tcg-target.h
>> @@ -25,6 +25,7 @@
>>  #define TCG_TARGET_I386 1
>>  
>>  #define TCG_TARGET_INSN_UNIT_SIZE  1
>> +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 32
> 
> 31.
> 
> Positive displacements only in 64-bit mode.  I don't think it's worth
> conditionalizing this for 32-bit, since we can't actually allocate 2G of TLBs
> and then actually accomplish anything.  ;-)

I suppose Paolo is already off to the hospital ;). Richard, if I just
s/32/31/ in the hunk above, does that mean you ack the patch?


Alex

> 
>> diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
>> index c88a1c9..f5ba52c 100644
>> --- a/tcg/mips/tcg-target.h
>> +++ b/tcg/mips/tcg-target.h
>> @@ -27,6 +27,7 @@
>>  #define TCG_TARGET_MIPS 1
>>  
>>  #define TCG_TARGET_INSN_UNIT_SIZE 4
>> +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
>>  #define TCG_TARGET_NB_REGS 32
>>  
>>  typedef enum {
>> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
>> index 32ac442..7ce7048 100644
>> --- a/tcg/ppc/tcg-target.h
>> +++ b/tcg/ppc/tcg-target.h
>> @@ -32,6 +32,7 @@
>>  
>>  #define TCG_TARGET_NB_REGS 32
>>  #define TCG_TARGET_INSN_UNIT_SIZE 4
>> +#define TCG_TARGET_TLB_DISPLACEMENT_BITS 16
> 
> Close enough, since the BUILD_BUG_ON should still catch this out if somehow the
> size is within that last 16 bytes of 64k.
> 
> 
> r~
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS
  2015-02-23 15:21     ` Alexander Graf
@ 2015-02-23 16:03       ` Richard Henderson
  0 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2015-02-23 16:03 UTC (permalink / raw)
  To: Alexander Graf, Paolo Bonzini, qemu-devel

On 02/23/2015 05:21 AM, Alexander Graf wrote:
> I suppose Paolo is already off to the hospital ;). Richard, if I just
> s/32/31/ in the hunk above, does that mean you ack the patch?

Yep.


r~

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-02-23 16:03 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-02-20 12:45 [Qemu-devel] [PATCH for-2.3 0/3] Support more than 8 MMU modes, speedup by 10% Paolo Bonzini
2015-02-20 12:45 ` [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS Paolo Bonzini
2015-02-20 12:45 ` [Qemu-devel] [PATCH 2/3] softmmu: support up to 12 MMU modes Paolo Bonzini
2015-02-20 12:45 ` [Qemu-devel] [PATCH 3/3] target-ppc: use separate indices for various translation modes Paolo Bonzini
2015-02-20 13:00   ` Alexander Graf
2015-02-20 17:58     ` Paolo Bonzini
  -- strict thread matches above, loose matches on Subject: below --
2015-02-20 17:57 [Qemu-devel] [PATCH v2 for-2.3 0/3] Support more than 8 MMU modes, speedup PPC by 10% Paolo Bonzini
2015-02-20 17:57 ` [Qemu-devel] [PATCH 1/3] tcg: add TCG_TARGET_TLB_DISPLACEMENT_BITS Paolo Bonzini
2015-02-20 19:36   ` Richard Henderson
2015-02-23 15:21     ` Alexander Graf
2015-02-23 16:03       ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).