From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:53500) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1b7XkQ-0003Lt-0C for qemu-devel@nongnu.org; Mon, 30 May 2016 20:41:32 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1b7XkL-0002rM-Su for qemu-devel@nongnu.org; Mon, 30 May 2016 20:41:29 -0400 From: David Gibson Date: Tue, 31 May 2016 10:41:08 +1000 Message-Id: <1464655277-14748-4-git-send-email-david@gibson.dropbear.id.au> In-Reply-To: <1464655277-14748-1-git-send-email-david@gibson.dropbear.id.au> References: <1464655277-14748-1-git-send-email-david@gibson.dropbear.id.au> MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Subject: [Qemu-devel] [PULL 03/12] ppc: Do some batching of TCG tlb flushes List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: peter.maydell@linaro.org Cc: agraf@suse.de, pbonzini@redhat.com, bharata.rao@gmail.com, benh@kernel.crashing.org, qemu-ppc@nongnu.org, qemu-devel@nongnu.org, =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= , David Gibson From: Benjamin Herrenschmidt On ppc64 especially, we flush the tlb on any slbie or tlbie instruction. However, those instructions often come in bursts of 3 or more (context switch will favor a series of slbie's for example to an slbia if the SLB has less than a certain number of entries in it, and tlbie's can happen in a series, with PAPR, H_BULK_REMOVE can remove up to 4 entries at a time. Doing a tlb_flush() each time is a waste of time. We end up doing a memse= t of the whole TLB, reloading it for the next instruction, memset'ing again= , etc... Those instructions don't have to take effect immediately. For slbie, they can wait for the next context synchronizing event. For tlbie, the next tlbsync. This implements batching by keeping a flag that indicates that we have a TLB in need of flushing. We check it on interrupts, rfi's, isync's and tlbsync and flush the TLB if needed. This reduces the number of tlb_flush() on a boot to a ubuntu installer first dialog screen from roughly 360K down to 36K. Signed-off-by: Benjamin Herrenschmidt [clg: added a 'CPUPPCState *' variable in h_remove() and h_bulk_remove() ] Signed-off-by: C=C3=A9dric Le Goater [dwg: removed spurious whitespace change, use 0/1 not true/false consistently, since tlb_need_flush has int type] Signed-off-by: David Gibson --- hw/ppc/spapr_hcall.c | 14 +++++++++++--- target-ppc/cpu.h | 2 ++ target-ppc/excp_helper.c | 8 ++++++++ target-ppc/helper.h | 1 + target-ppc/helper_regs.h | 13 +++++++++++++ target-ppc/mmu-hash64.c | 11 +++-------- target-ppc/mmu_helper.c | 9 ++++++++- target-ppc/translate.c | 39 ++++++++++++++++++++++++++++++++++++--- 8 files changed, 82 insertions(+), 15 deletions(-) diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c index feb3629..9a3f4ec 100644 --- a/hw/ppc/spapr_hcall.c +++ b/hw/ppc/spapr_hcall.c @@ -186,6 +186,7 @@ static RemoveResult remove_hpte(PowerPCCPU *cpu, targ= et_ulong ptex, static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMachineState *spapr, target_ulong opcode, target_ulong *args) { + CPUPPCState *env =3D &cpu->env; target_ulong flags =3D args[0]; target_ulong pte_index =3D args[1]; target_ulong avpn =3D args[2]; @@ -196,6 +197,7 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMa= chineState *spapr, =20 switch (ret) { case REMOVE_SUCCESS: + check_tlb_flush(env); return H_SUCCESS; =20 case REMOVE_NOT_FOUND: @@ -232,7 +234,9 @@ static target_ulong h_remove(PowerPCCPU *cpu, sPAPRMa= chineState *spapr, static target_ulong h_bulk_remove(PowerPCCPU *cpu, sPAPRMachineState *sp= apr, target_ulong opcode, target_ulong *arg= s) { + CPUPPCState *env =3D &cpu->env; int i; + target_ulong rc =3D H_SUCCESS; =20 for (i =3D 0; i < H_BULK_REMOVE_MAX_BATCH; i++) { target_ulong *tsh =3D &args[i*2]; @@ -265,14 +269,18 @@ static target_ulong h_bulk_remove(PowerPCCPU *cpu, = sPAPRMachineState *spapr, break; =20 case REMOVE_PARM: - return H_PARAMETER; + rc =3D H_PARAMETER; + goto exit; =20 case REMOVE_HW: - return H_HARDWARE; + rc =3D H_HARDWARE; + goto exit; } } + exit: + check_tlb_flush(env); =20 - return H_SUCCESS; + return rc; } =20 static target_ulong h_protect(PowerPCCPU *cpu, sPAPRMachineState *spapr, diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 2c8c8c0..98a24a5 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -958,6 +958,8 @@ struct CPUPPCState { /* PowerPC 64 SLB area */ ppc_slb_t slb[MAX_SLB_ENTRIES]; int32_t slb_nr; + /* tcg TLB needs flush (deferred slb inval instruction typically) */ + uint32_t tlb_need_flush; #endif /* segment registers */ hwaddr htab_base; diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c index ba3caec..a37009e 100644 --- a/target-ppc/excp_helper.c +++ b/target-ppc/excp_helper.c @@ -718,6 +718,11 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int= excp_model, int excp) /* Reset exception state */ cs->exception_index =3D POWERPC_EXCP_NONE; env->error_code =3D 0; + + /* Any interrupt is context synchronizing, check if TCG TLB + * needs a delayed flush on ppc64 + */ + check_tlb_flush(env); } =20 void ppc_cpu_do_interrupt(CPUState *cs) @@ -943,6 +948,9 @@ static inline void do_rfi(CPUPPCState *env, target_ul= ong nip, target_ulong msr, * as rfi is always the last insn of a TB */ cs->interrupt_request |=3D CPU_INTERRUPT_EXITTB; + + /* Context synchronizing: check if TCG TLB needs flush */ + check_tlb_flush(env); } =20 void helper_rfi(CPUPPCState *env) diff --git a/target-ppc/helper.h b/target-ppc/helper.h index e5a8f7b..0526322 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -16,6 +16,7 @@ DEF_HELPER_1(rfmci, void, env) DEF_HELPER_1(rfid, void, env) DEF_HELPER_1(hrfid, void, env) #endif +DEF_HELPER_1(check_tlb_flush, void, env) #endif =20 DEF_HELPER_3(lmw, void, env, tl, i32) diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h index f7edd5b..57da931 100644 --- a/target-ppc/helper_regs.h +++ b/target-ppc/helper_regs.h @@ -151,4 +151,17 @@ static inline int hreg_store_msr(CPUPPCState *env, t= arget_ulong value, return excp; } =20 +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) +static inline void check_tlb_flush(CPUPPCState *env) +{ + CPUState *cs =3D CPU(ppc_env_get_cpu(env)); + if (env->tlb_need_flush) { + env->tlb_need_flush =3D 0; + tlb_flush(cs, 1); + } +} +#else +static inline void check_tlb_flush(CPUPPCState *env) { } +#endif + #endif /* !defined(__HELPER_REGS_H__) */ diff --git a/target-ppc/mmu-hash64.c b/target-ppc/mmu-hash64.c index 17e2480..ea6e99a 100644 --- a/target-ppc/mmu-hash64.c +++ b/target-ppc/mmu-hash64.c @@ -99,10 +99,8 @@ void dump_slb(FILE *f, fprintf_function cpu_fprintf, P= owerPCCPU *cpu) =20 void helper_slbia(CPUPPCState *env) { - PowerPCCPU *cpu =3D ppc_env_get_cpu(env); - int n, do_invalidate; + int n; =20 - do_invalidate =3D 0; /* XXX: Warning: slbia never invalidates the first segment */ for (n =3D 1; n < env->slb_nr; n++) { ppc_slb_t *slb =3D &env->slb[n]; @@ -113,12 +111,9 @@ void helper_slbia(CPUPPCState *env) * and we still don't have a tlb_flush_mask(env, n, mas= k) * in QEMU, we just invalidate all TLBs */ - do_invalidate =3D 1; + env->tlb_need_flush =3D 1; } } - if (do_invalidate) { - tlb_flush(CPU(cpu), 1); - } } =20 void helper_slbie(CPUPPCState *env, target_ulong addr) @@ -138,7 +133,7 @@ void helper_slbie(CPUPPCState *env, target_ulong addr= ) * and we still don't have a tlb_flush_mask(env, n, mask) * in QEMU, we just invalidate all TLBs */ - tlb_flush(CPU(cpu), 1); + env->tlb_need_flush =3D 1; } } =20 diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c index 2e0e3ca..1499af72 100644 --- a/target-ppc/mmu_helper.c +++ b/target-ppc/mmu_helper.c @@ -27,6 +27,7 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/log.h" +#include "helper_regs.h" =20 //#define DEBUG_MMU //#define DEBUG_BATS @@ -1924,6 +1925,7 @@ void ppc_tlb_invalidate_all(CPUPPCState *env) case POWERPC_MMU_2_06a: case POWERPC_MMU_2_07: case POWERPC_MMU_2_07a: + env->tlb_need_flush =3D 0; #endif /* defined(TARGET_PPC64) */ tlb_flush(CPU(cpu), 1); break; @@ -1986,7 +1988,7 @@ void ppc_tlb_invalidate_one(CPUPPCState *env, targe= t_ulong addr) * and we still don't have a tlb_flush_mask(env, n, mask) i= n QEMU, * we just invalidate all TLBs */ - tlb_flush(CPU(cpu), 1); + env->tlb_need_flush =3D 1; break; #endif /* defined(TARGET_PPC64) */ default: @@ -2875,6 +2877,11 @@ void helper_booke206_tlbflush(CPUPPCState *env, ta= rget_ulong type) } =20 =20 +void helper_check_tlb_flush(CPUPPCState *env) +{ + check_tlb_flush(env); +} + /***********************************************************************= ******/ =20 /* try to fill the TLB and return an exception if error. If retaddr is diff --git a/target-ppc/translate.c b/target-ppc/translate.c index b757634..dfd3010 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -3275,9 +3275,32 @@ static void gen_eieio(DisasContext *ctx) { } =20 +#if !defined(CONFIG_USER_ONLY) && defined(TARGET_PPC64) +static inline void gen_check_tlb_flush(DisasContext *ctx) +{ + TCGv_i32 t =3D tcg_temp_new_i32(); + TCGLabel *l =3D gen_new_label(); + + tcg_gen_ld_i32(t, cpu_env, offsetof(CPUPPCState, tlb_need_flush)); + tcg_gen_brcondi_i32(TCG_COND_EQ, t, 0, l); + gen_helper_check_tlb_flush(cpu_env); + gen_set_label(l); + tcg_temp_free_i32(t); +} +#else +static inline void gen_check_tlb_flush(DisasContext *ctx) { } +#endif + /* isync */ static void gen_isync(DisasContext *ctx) { + /* + * We need to check for a pending TLB flush. This can only happen in + * kernel mode however so check MSR_PR + */ + if (!ctx->pr) { + gen_check_tlb_flush(ctx); + } gen_stop_exception(ctx); } =20 @@ -3434,6 +3457,15 @@ STCX(stqcx_, 16); /* sync */ static void gen_sync(DisasContext *ctx) { + uint32_t l =3D (ctx->opcode >> 21) & 3; + + /* + * For l =3D=3D 2, it's a ptesync, We need to check for a pending TL= B flush. + * This can only happen in kernel mode however so check MSR_PR as we= ll. + */ + if (l =3D=3D 2 && !ctx->pr) { + gen_check_tlb_flush(ctx); + } } =20 /* wait */ @@ -4851,10 +4883,11 @@ static void gen_tlbsync(DisasContext *ctx) gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC); return; } - /* This has no effect: it should ensure that all previous - * tlbie have completed + /* tlbsync is a nop for server, ptesync handles delayed tlb flush, + * embedded however needs to deal with tlbsync. We don't try to be + * fancy and swallow the overhead of checking for both. */ - gen_stop_exception(ctx); + gen_check_tlb_flush(ctx); #endif } =20 --=20 2.5.5