* [Qemu-devel] [RFC/PATCH] ppc: Batch TLB flushes on 32-bit 6xx/7xx/7xxx in hash mode
@ 2016-06-06 10:23 Benjamin Herrenschmidt
2016-06-06 11:13 ` Cédric Le Goater
2016-06-06 22:36 ` Mark Cave-Ayland
0 siblings, 2 replies; 4+ messages in thread
From: Benjamin Herrenschmidt @ 2016-06-06 10:23 UTC (permalink / raw)
To: qemu-ppc
Cc: qemu-devel, David Gibson, Cédric Le Goater, Mark Cave-Ayland
This ports the existing 64-bit mechanism to 32-bit, thus series
of 64 tlbie's followed by a sync like some versions of Darwin
(ab)use will result in a single flush.
We apply a pending flush on any sync instruction though, as Darwin
doesn't use tlbsync on non-SMP systems.
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
Note: I haven't done any performance impact measurements with this
one ... feel free to let me know what it does for you :-)
target-ppc/cpu.h | 2 +-
target-ppc/helper_regs.h | 2 +-
target-ppc/mmu_helper.c | 44 ++++++++------------------------------------
target-ppc/translate.c | 27 +++++++++++++++++++++------
4 files changed, 31 insertions(+), 44 deletions(-)
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index d8f8f7e..c2962d7 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -959,7 +959,6 @@ struct CPUPPCState {
ppc_slb_t slb[MAX_SLB_ENTRIES];
int32_t slb_nr;
/* tcg TLB needs flush (deferred slb inval instruction typically) */
- uint32_t tlb_need_flush;
#endif
/* segment registers */
hwaddr htab_base;
@@ -985,6 +984,7 @@ struct CPUPPCState {
target_ulong pb[4];
bool tlb_dirty; /* Set to non-zero when modifying TLB */
bool kvm_sw_tlb; /* non-zero if KVM SW TLB API is active */
+ uint32_t tlb_need_flush; /* Delayed flush needed */
#endif
/* Other registers */
diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
index 104b690..8fc0934 100644
--- a/target-ppc/helper_regs.h
+++ b/target-ppc/helper_regs.h
@@ -151,7 +151,7 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
return excp;
}
-#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
+#if !defined(CONFIG_USER_ONLY)
static inline void check_tlb_flush(CPUPPCState *env)
{
CPUState *cs = CPU(ppc_env_get_cpu(env));
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index a5e3878..485d5b8 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -1935,8 +1935,8 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
case POWERPC_MMU_2_06a:
case POWERPC_MMU_2_07:
case POWERPC_MMU_2_07a:
- env->tlb_need_flush = 0;
#endif /* defined(TARGET_PPC64) */
+ env->tlb_need_flush = 0;
tlb_flush(CPU(cpu), 1);
break;
default:
@@ -1949,9 +1949,6 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
{
#if !defined(FLUSH_ALL_TLBS)
- PowerPCCPU *cpu = ppc_env_get_cpu(env);
- CPUState *cs;
-
addr &= TARGET_PAGE_MASK;
switch (env->mmu_model) {
case POWERPC_MMU_SOFT_6xx:
@@ -1963,36 +1960,12 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
break;
case POWERPC_MMU_32B:
case POWERPC_MMU_601:
- /* tlbie invalidate TLBs for all segments */
- addr &= ~((target_ulong)-1ULL << 28);
- cs = CPU(cpu);
- /* XXX: this case should be optimized,
- * giving a mask to tlb_flush_page
- */
- /* This is broken, some CPUs invalidate a whole congruence
- * class on an even smaller subset of bits and some OSes take
- * advantage of this. Just blow the whole thing away.
+ /* Actual CPUs invalidate entire congruence classes based on the
+ * geometry of their TLBs and some OSes take that into account,
+ * we just mark the TLB to be flushed later (context synchronizing
+ * event or sync instruction on 32-bit).
*/
-#if 0
- tlb_flush_page(cs, addr | (0x0 << 28));
- tlb_flush_page(cs, addr | (0x1 << 28));
- tlb_flush_page(cs, addr | (0x2 << 28));
- tlb_flush_page(cs, addr | (0x3 << 28));
- tlb_flush_page(cs, addr | (0x4 << 28));
- tlb_flush_page(cs, addr | (0x5 << 28));
- tlb_flush_page(cs, addr | (0x6 << 28));
- tlb_flush_page(cs, addr | (0x7 << 28));
- tlb_flush_page(cs, addr | (0x8 << 28));
- tlb_flush_page(cs, addr | (0x9 << 28));
- tlb_flush_page(cs, addr | (0xA << 28));
- tlb_flush_page(cs, addr | (0xB << 28));
- tlb_flush_page(cs, addr | (0xC << 28));
- tlb_flush_page(cs, addr | (0xD << 28));
- tlb_flush_page(cs, addr | (0xE << 28));
- tlb_flush_page(cs, addr | (0xF << 28));
-#else
- tlb_flush(cs, 1);
-#endif
+ env->tlb_need_flush = 1;
break;
#if defined(TARGET_PPC64)
case POWERPC_MMU_64B:
@@ -2058,13 +2031,12 @@ target_ulong helper_load_sr(CPUPPCState *env, target_ulong sr_num)
void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value)
{
- PowerPCCPU *cpu = ppc_env_get_cpu(env);
-
qemu_log_mask(CPU_LOG_MMU,
"%s: reg=%d " TARGET_FMT_lx " " TARGET_FMT_lx "\n", __func__,
(int)srnum, value, env->sr[srnum]);
#if defined(TARGET_PPC64)
if (env->mmu_model & POWERPC_MMU_64) {
+ PowerPCCPU *cpu = ppc_env_get_cpu(env);
uint64_t esid, vsid;
/* ESID = srnum */
@@ -2093,7 +2065,7 @@ void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value)
}
}
#else
- tlb_flush(CPU(cpu), 1);
+ env->tlb_need_flush = 1;
#endif
}
}
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 7763431..ab5862f 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -193,6 +193,7 @@ struct DisasContext {
uint32_t exception;
/* Routine used to access memory */
bool pr, hv;
+ bool lazy_tlb_flush;
int mem_idx;
int access_type;
/* Translation flags */
@@ -3290,12 +3291,17 @@ static void gen_eieio(DisasContext *ctx)
{
}
-#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
+#if !defined(CONFIG_USER_ONLY)
static inline void gen_check_tlb_flush(DisasContext *ctx)
{
- TCGv_i32 t = tcg_temp_new_i32();
- TCGLabel *l = gen_new_label();
+ TCGv_i32 t;
+ TCGLabel *l;
+ if (!ctx->lazy_tlb_flush) {
+ return;
+ }
+ l = gen_new_label();
+ t = tcg_temp_new_i32();
tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
gen_helper_check_tlb_flush(cpu_env);
@@ -3475,10 +3481,14 @@ static void gen_sync(DisasContext *ctx)
uint32_t l = (ctx->opcode >> 21) & 3;
/*
- * For l == 2, it's a ptesync, We need to check for a pending TLB flush.
- * This can only happen in kernel mode however so check MSR_PR as well.
+ * We may need to check for a pending TLB flush.
+ *
+ * We do this on ptesync (l == 2) on ppc64 and any sync pn ppc32.
+ *
+ * Additionally, this can only happen in kernel mode however so
+ * check MSR_PR as well.
*/
- if (l == 2 && !ctx->pr) {
+ if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) {
gen_check_tlb_flush(ctx);
}
}
@@ -11491,6 +11501,11 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
ctx.sf_mode = msr_is_64bit(env, env->msr);
ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
#endif
+ if (env->mmu_model == POWERPC_MMU_32B ||
+ env->mmu_model == POWERPC_MMU_601 ||
+ (env->mmu_model & POWERPC_MMU_64B))
+ ctx.lazy_tlb_flush = true;
+
ctx.fpu_enabled = msr_fp;
if ((env->flags & POWERPC_FLAG_SPE) && msr_spe)
ctx.spe_enabled = msr_spe;
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [Qemu-devel] [RFC/PATCH] ppc: Batch TLB flushes on 32-bit 6xx/7xx/7xxx in hash mode
2016-06-06 10:23 [Qemu-devel] [RFC/PATCH] ppc: Batch TLB flushes on 32-bit 6xx/7xx/7xxx in hash mode Benjamin Herrenschmidt
@ 2016-06-06 11:13 ` Cédric Le Goater
2016-06-06 22:36 ` Mark Cave-Ayland
1 sibling, 0 replies; 4+ messages in thread
From: Cédric Le Goater @ 2016-06-06 11:13 UTC (permalink / raw)
To: Benjamin Herrenschmidt, qemu-ppc
Cc: qemu-devel, David Gibson, Mark Cave-Ayland
On 06/06/2016 12:23 PM, Benjamin Herrenschmidt wrote:
> This ports the existing 64-bit mechanism to 32-bit, thus series
> of 64 tlbie's followed by a sync like some versions of Darwin
> (ab)use will result in a single flush.
>
> We apply a pending flush on any sync instruction though, as Darwin
> doesn't use tlbsync on non-SMP systems.
Yes, this is the case at the right beginning of boot but it does use
tlbsync after, in hw_rem_map() where pvr is only tested against 603.
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>
> Note: I haven't done any performance impact measurements with this
> one ... feel free to let me know what it does for you :-)
It adds a couple of seconds improvement on a ~47s boot time on my
thinkpad. So a 2-5% I would say but I haven't done much more perf.
Thanks,
C.
> target-ppc/cpu.h | 2 +-
> target-ppc/helper_regs.h | 2 +-
> target-ppc/mmu_helper.c | 44 ++++++++------------------------------------
> target-ppc/translate.c | 27 +++++++++++++++++++++------
> 4 files changed, 31 insertions(+), 44 deletions(-)
>
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index d8f8f7e..c2962d7 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -959,7 +959,6 @@ struct CPUPPCState {
> ppc_slb_t slb[MAX_SLB_ENTRIES];
> int32_t slb_nr;
> /* tcg TLB needs flush (deferred slb inval instruction typically) */
> - uint32_t tlb_need_flush;
> #endif
> /* segment registers */
> hwaddr htab_base;
> @@ -985,6 +984,7 @@ struct CPUPPCState {
> target_ulong pb[4];
> bool tlb_dirty; /* Set to non-zero when modifying TLB */
> bool kvm_sw_tlb; /* non-zero if KVM SW TLB API is active */
> + uint32_t tlb_need_flush; /* Delayed flush needed */
> #endif
>
> /* Other registers */
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 104b690..8fc0934 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -151,7 +151,7 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
> return excp;
> }
>
> -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +#if !defined(CONFIG_USER_ONLY)
> static inline void check_tlb_flush(CPUPPCState *env)
> {
> CPUState *cs = CPU(ppc_env_get_cpu(env));
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index a5e3878..485d5b8 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -1935,8 +1935,8 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
> case POWERPC_MMU_2_06a:
> case POWERPC_MMU_2_07:
> case POWERPC_MMU_2_07a:
> - env->tlb_need_flush = 0;
> #endif /* defined(TARGET_PPC64) */
> + env->tlb_need_flush = 0;
> tlb_flush(CPU(cpu), 1);
> break;
> default:
> @@ -1949,9 +1949,6 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
> void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
> {
> #if !defined(FLUSH_ALL_TLBS)
> - PowerPCCPU *cpu = ppc_env_get_cpu(env);
> - CPUState *cs;
> -
> addr &= TARGET_PAGE_MASK;
> switch (env->mmu_model) {
> case POWERPC_MMU_SOFT_6xx:
> @@ -1963,36 +1960,12 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
> break;
> case POWERPC_MMU_32B:
> case POWERPC_MMU_601:
> - /* tlbie invalidate TLBs for all segments */
> - addr &= ~((target_ulong)-1ULL << 28);
> - cs = CPU(cpu);
> - /* XXX: this case should be optimized,
> - * giving a mask to tlb_flush_page
> - */
> - /* This is broken, some CPUs invalidate a whole congruence
> - * class on an even smaller subset of bits and some OSes take
> - * advantage of this. Just blow the whole thing away.
> + /* Actual CPUs invalidate entire congruence classes based on the
> + * geometry of their TLBs and some OSes take that into account,
> + * we just mark the TLB to be flushed later (context synchronizing
> + * event or sync instruction on 32-bit).
> */
> -#if 0
> - tlb_flush_page(cs, addr | (0x0 << 28));
> - tlb_flush_page(cs, addr | (0x1 << 28));
> - tlb_flush_page(cs, addr | (0x2 << 28));
> - tlb_flush_page(cs, addr | (0x3 << 28));
> - tlb_flush_page(cs, addr | (0x4 << 28));
> - tlb_flush_page(cs, addr | (0x5 << 28));
> - tlb_flush_page(cs, addr | (0x6 << 28));
> - tlb_flush_page(cs, addr | (0x7 << 28));
> - tlb_flush_page(cs, addr | (0x8 << 28));
> - tlb_flush_page(cs, addr | (0x9 << 28));
> - tlb_flush_page(cs, addr | (0xA << 28));
> - tlb_flush_page(cs, addr | (0xB << 28));
> - tlb_flush_page(cs, addr | (0xC << 28));
> - tlb_flush_page(cs, addr | (0xD << 28));
> - tlb_flush_page(cs, addr | (0xE << 28));
> - tlb_flush_page(cs, addr | (0xF << 28));
> -#else
> - tlb_flush(cs, 1);
> -#endif
> + env->tlb_need_flush = 1;
> break;
> #if defined(TARGET_PPC64)
> case POWERPC_MMU_64B:
> @@ -2058,13 +2031,12 @@ target_ulong helper_load_sr(CPUPPCState *env, target_ulong sr_num)
>
> void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value)
> {
> - PowerPCCPU *cpu = ppc_env_get_cpu(env);
> -
> qemu_log_mask(CPU_LOG_MMU,
> "%s: reg=%d " TARGET_FMT_lx " " TARGET_FMT_lx "\n", __func__,
> (int)srnum, value, env->sr[srnum]);
> #if defined(TARGET_PPC64)
> if (env->mmu_model & POWERPC_MMU_64) {
> + PowerPCCPU *cpu = ppc_env_get_cpu(env);
> uint64_t esid, vsid;
>
> /* ESID = srnum */
> @@ -2093,7 +2065,7 @@ void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value)
> }
> }
> #else
> - tlb_flush(CPU(cpu), 1);
> + env->tlb_need_flush = 1;
> #endif
> }
> }
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 7763431..ab5862f 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -193,6 +193,7 @@ struct DisasContext {
> uint32_t exception;
> /* Routine used to access memory */
> bool pr, hv;
> + bool lazy_tlb_flush;
> int mem_idx;
> int access_type;
> /* Translation flags */
> @@ -3290,12 +3291,17 @@ static void gen_eieio(DisasContext *ctx)
> {
> }
>
> -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +#if !defined(CONFIG_USER_ONLY)
> static inline void gen_check_tlb_flush(DisasContext *ctx)
> {
> - TCGv_i32 t = tcg_temp_new_i32();
> - TCGLabel *l = gen_new_label();
> + TCGv_i32 t;
> + TCGLabel *l;
>
> + if (!ctx->lazy_tlb_flush) {
> + return;
> + }
> + l = gen_new_label();
> + t = tcg_temp_new_i32();
> tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
> tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
> gen_helper_check_tlb_flush(cpu_env);
> @@ -3475,10 +3481,14 @@ static void gen_sync(DisasContext *ctx)
> uint32_t l = (ctx->opcode >> 21) & 3;
>
> /*
> - * For l == 2, it's a ptesync, We need to check for a pending TLB flush.
> - * This can only happen in kernel mode however so check MSR_PR as well.
> + * We may need to check for a pending TLB flush.
> + *
> + * We do this on ptesync (l == 2) on ppc64 and any sync pn ppc32.
> + *
> + * Additionally, this can only happen in kernel mode however so
> + * check MSR_PR as well.
> */
> - if (l == 2 && !ctx->pr) {
> + if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) {
> gen_check_tlb_flush(ctx);
> }
> }
> @@ -11491,6 +11501,11 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
> ctx.sf_mode = msr_is_64bit(env, env->msr);
> ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
> #endif
> + if (env->mmu_model == POWERPC_MMU_32B ||
> + env->mmu_model == POWERPC_MMU_601 ||
> + (env->mmu_model & POWERPC_MMU_64B))
> + ctx.lazy_tlb_flush = true;
> +
> ctx.fpu_enabled = msr_fp;
> if ((env->flags & POWERPC_FLAG_SPE) && msr_spe)
> ctx.spe_enabled = msr_spe;
>
>
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Qemu-devel] [RFC/PATCH] ppc: Batch TLB flushes on 32-bit 6xx/7xx/7xxx in hash mode
2016-06-06 10:23 [Qemu-devel] [RFC/PATCH] ppc: Batch TLB flushes on 32-bit 6xx/7xx/7xxx in hash mode Benjamin Herrenschmidt
2016-06-06 11:13 ` Cédric Le Goater
@ 2016-06-06 22:36 ` Mark Cave-Ayland
2016-06-06 22:49 ` Benjamin Herrenschmidt
1 sibling, 1 reply; 4+ messages in thread
From: Mark Cave-Ayland @ 2016-06-06 22:36 UTC (permalink / raw)
To: Benjamin Herrenschmidt, qemu-ppc
Cc: Cédric Le Goater, qemu-devel, David Gibson
On 06/06/16 11:23, Benjamin Herrenschmidt wrote:
> This ports the existing 64-bit mechanism to 32-bit, thus series
> of 64 tlbie's followed by a sync like some versions of Darwin
> (ab)use will result in a single flush.
>
> We apply a pending flush on any sync instruction though, as Darwin
> doesn't use tlbsync on non-SMP systems.
>
> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
> ---
>
> Note: I haven't done any performance impact measurements with this
> one ... feel free to let me know what it does for you :-)
>
> target-ppc/cpu.h | 2 +-
> target-ppc/helper_regs.h | 2 +-
> target-ppc/mmu_helper.c | 44 ++++++++------------------------------------
> target-ppc/translate.c | 27 +++++++++++++++++++++------
> 4 files changed, 31 insertions(+), 44 deletions(-)
>
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index d8f8f7e..c2962d7 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -959,7 +959,6 @@ struct CPUPPCState {
> ppc_slb_t slb[MAX_SLB_ENTRIES];
> int32_t slb_nr;
> /* tcg TLB needs flush (deferred slb inval instruction typically) */
> - uint32_t tlb_need_flush;
> #endif
> /* segment registers */
> hwaddr htab_base;
> @@ -985,6 +984,7 @@ struct CPUPPCState {
> target_ulong pb[4];
> bool tlb_dirty; /* Set to non-zero when modifying TLB */
> bool kvm_sw_tlb; /* non-zero if KVM SW TLB API is active */
> + uint32_t tlb_need_flush; /* Delayed flush needed */
> #endif
>
> /* Other registers */
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 104b690..8fc0934 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -151,7 +151,7 @@ static inline int hreg_store_msr(CPUPPCState *env, target_ulong value,
> return excp;
> }
>
> -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +#if !defined(CONFIG_USER_ONLY)
> static inline void check_tlb_flush(CPUPPCState *env)
> {
> CPUState *cs = CPU(ppc_env_get_cpu(env));
> diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
> index a5e3878..485d5b8 100644
> --- a/target-ppc/mmu_helper.c
> +++ b/target-ppc/mmu_helper.c
> @@ -1935,8 +1935,8 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
> case POWERPC_MMU_2_06a:
> case POWERPC_MMU_2_07:
> case POWERPC_MMU_2_07a:
> - env->tlb_need_flush = 0;
> #endif /* defined(TARGET_PPC64) */
> + env->tlb_need_flush = 0;
> tlb_flush(CPU(cpu), 1);
> break;
> default:
> @@ -1949,9 +1949,6 @@ void ppc_tlb_invalidate_all(CPUPPCState *env)
> void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
> {
> #if !defined(FLUSH_ALL_TLBS)
> - PowerPCCPU *cpu = ppc_env_get_cpu(env);
> - CPUState *cs;
> -
> addr &= TARGET_PAGE_MASK;
> switch (env->mmu_model) {
> case POWERPC_MMU_SOFT_6xx:
> @@ -1963,36 +1960,12 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, target_ulong addr)
> break;
> case POWERPC_MMU_32B:
> case POWERPC_MMU_601:
> - /* tlbie invalidate TLBs for all segments */
> - addr &= ~((target_ulong)-1ULL << 28);
> - cs = CPU(cpu);
> - /* XXX: this case should be optimized,
> - * giving a mask to tlb_flush_page
> - */
> - /* This is broken, some CPUs invalidate a whole congruence
> - * class on an even smaller subset of bits and some OSes take
> - * advantage of this. Just blow the whole thing away.
> + /* Actual CPUs invalidate entire congruence classes based on the
> + * geometry of their TLBs and some OSes take that into account,
> + * we just mark the TLB to be flushed later (context synchronizing
> + * event or sync instruction on 32-bit).
> */
> -#if 0
> - tlb_flush_page(cs, addr | (0x0 << 28));
> - tlb_flush_page(cs, addr | (0x1 << 28));
> - tlb_flush_page(cs, addr | (0x2 << 28));
> - tlb_flush_page(cs, addr | (0x3 << 28));
> - tlb_flush_page(cs, addr | (0x4 << 28));
> - tlb_flush_page(cs, addr | (0x5 << 28));
> - tlb_flush_page(cs, addr | (0x6 << 28));
> - tlb_flush_page(cs, addr | (0x7 << 28));
> - tlb_flush_page(cs, addr | (0x8 << 28));
> - tlb_flush_page(cs, addr | (0x9 << 28));
> - tlb_flush_page(cs, addr | (0xA << 28));
> - tlb_flush_page(cs, addr | (0xB << 28));
> - tlb_flush_page(cs, addr | (0xC << 28));
> - tlb_flush_page(cs, addr | (0xD << 28));
> - tlb_flush_page(cs, addr | (0xE << 28));
> - tlb_flush_page(cs, addr | (0xF << 28));
> -#else
> - tlb_flush(cs, 1);
> -#endif
> + env->tlb_need_flush = 1;
> break;
> #if defined(TARGET_PPC64)
> case POWERPC_MMU_64B:
> @@ -2058,13 +2031,12 @@ target_ulong helper_load_sr(CPUPPCState *env, target_ulong sr_num)
>
> void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value)
> {
> - PowerPCCPU *cpu = ppc_env_get_cpu(env);
> -
> qemu_log_mask(CPU_LOG_MMU,
> "%s: reg=%d " TARGET_FMT_lx " " TARGET_FMT_lx "\n", __func__,
> (int)srnum, value, env->sr[srnum]);
> #if defined(TARGET_PPC64)
> if (env->mmu_model & POWERPC_MMU_64) {
> + PowerPCCPU *cpu = ppc_env_get_cpu(env);
> uint64_t esid, vsid;
>
> /* ESID = srnum */
> @@ -2093,7 +2065,7 @@ void helper_store_sr(CPUPPCState *env, target_ulong srnum, target_ulong value)
> }
> }
> #else
> - tlb_flush(CPU(cpu), 1);
> + env->tlb_need_flush = 1;
> #endif
> }
> }
> diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> index 7763431..ab5862f 100644
> --- a/target-ppc/translate.c
> +++ b/target-ppc/translate.c
> @@ -193,6 +193,7 @@ struct DisasContext {
> uint32_t exception;
> /* Routine used to access memory */
> bool pr, hv;
> + bool lazy_tlb_flush;
> int mem_idx;
> int access_type;
> /* Translation flags */
> @@ -3290,12 +3291,17 @@ static void gen_eieio(DisasContext *ctx)
> {
> }
>
> -#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64)
> +#if !defined(CONFIG_USER_ONLY)
> static inline void gen_check_tlb_flush(DisasContext *ctx)
> {
> - TCGv_i32 t = tcg_temp_new_i32();
> - TCGLabel *l = gen_new_label();
> + TCGv_i32 t;
> + TCGLabel *l;
>
> + if (!ctx->lazy_tlb_flush) {
> + return;
> + }
> + l = gen_new_label();
> + t = tcg_temp_new_i32();
> tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush));
> tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l);
> gen_helper_check_tlb_flush(cpu_env);
> @@ -3475,10 +3481,14 @@ static void gen_sync(DisasContext *ctx)
> uint32_t l = (ctx->opcode >> 21) & 3;
>
> /*
> - * For l == 2, it's a ptesync, We need to check for a pending TLB flush.
> - * This can only happen in kernel mode however so check MSR_PR as well.
> + * We may need to check for a pending TLB flush.
> + *
> + * We do this on ptesync (l == 2) on ppc64 and any sync pn ppc32.
> + *
> + * Additionally, this can only happen in kernel mode however so
> + * check MSR_PR as well.
> */
> - if (l == 2 && !ctx->pr) {
> + if (((l == 2) || !(ctx->insns_flags & PPC_64B)) && !ctx->pr) {
> gen_check_tlb_flush(ctx);
> }
> }
> @@ -11491,6 +11501,11 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
> ctx.sf_mode = msr_is_64bit(env, env->msr);
> ctx.has_cfar = !!(env->flags & POWERPC_FLAG_CFAR);
> #endif
> + if (env->mmu_model == POWERPC_MMU_32B ||
> + env->mmu_model == POWERPC_MMU_601 ||
> + (env->mmu_model & POWERPC_MMU_64B))
> + ctx.lazy_tlb_flush = true;
> +
> ctx.fpu_enabled = msr_fp;
> if ((env->flags & POWERPC_FLAG_SPE) && msr_spe)
> ctx.spe_enabled = msr_spe;
>
>
After another run of the OpenBIOS tests with this patch applied on top
of the previous 2 patches, I see no regressions introduced. Like Cédric
I don't get the feeling that the Mac machines necessarily run faster,
however the overall experience does feel smoother and more responsive.
Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
ATB,
Mark.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Qemu-devel] [RFC/PATCH] ppc: Batch TLB flushes on 32-bit 6xx/7xx/7xxx in hash mode
2016-06-06 22:36 ` Mark Cave-Ayland
@ 2016-06-06 22:49 ` Benjamin Herrenschmidt
0 siblings, 0 replies; 4+ messages in thread
From: Benjamin Herrenschmidt @ 2016-06-06 22:49 UTC (permalink / raw)
To: Mark Cave-Ayland, qemu-ppc
Cc: Cédric Le Goater, qemu-devel, David Gibson
On Mon, 2016-06-06 at 23:36 +0100, Mark Cave-Ayland wrote:
>
> After another run of the OpenBIOS tests with this patch applied on top
> of the previous 2 patches, I see no regressions introduced. Like Cédric
> I don't get the feeling that the Mac machines necessarily run faster,
> however the overall experience does feel smoother and more responsive.
>
> Tested-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
Thanks !
Cheers,
Ben.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-06-06 22:50 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-06-06 10:23 [Qemu-devel] [RFC/PATCH] ppc: Batch TLB flushes on 32-bit 6xx/7xx/7xxx in hash mode Benjamin Herrenschmidt
2016-06-06 11:13 ` Cédric Le Goater
2016-06-06 22:36 ` Mark Cave-Ayland
2016-06-06 22:49 ` Benjamin Herrenschmidt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).