From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:35918) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1X71dv-0002Ls-8Y for qemu-devel@nongnu.org; Tue, 15 Jul 2014 08:15:42 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1X71df-0000Ow-SR for qemu-devel@nongnu.org; Tue, 15 Jul 2014 08:15:34 -0400 Received: from cantor2.suse.de ([195.135.220.15]:60502 helo=mx2.suse.de) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1X71df-0000OF-Gz for qemu-devel@nongnu.org; Tue, 15 Jul 2014 08:15:19 -0400 Message-ID: <53C51B56.6010509@suse.de> Date: Tue, 15 Jul 2014 14:15:18 +0200 From: =?UTF-8?B?QW5kcmVhcyBGw6RyYmVy?= MIME-Version: 1.0 References: <1405424541-21803-1-git-send-email-alex.bennee@linaro.org> <1405424541-21803-4-git-send-email-alex.bennee@linaro.org> In-Reply-To: <1405424541-21803-4-git-send-email-alex.bennee@linaro.org> Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: quoted-printable Subject: Re: [Qemu-devel] [PATCH v2 3/3] trace: instrument and trace tcg tb flush activity List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: =?UTF-8?B?QWxleCBCZW5uw6ll?= Cc: qemu-devel@nongnu.org, stefanha@redhat.com, mohamad.gebai@gmail.com Hi, Am 15.07.2014 13:42, schrieb Alex Benn=C3=A9e: > The tb_find_fast path is important to quickly moving from one block to > the next. However we need to flush it when tlb changes occur so it's > important to know how well we are doing with the cache. >=20 > This patch adds some basic hit/miss profiling to the tb_find_fast > tracepoint as well as a number of other tb_ related areas. I've also > added a trace_inc_counter() helper which gets inlined away when tracing > is disabled. >=20 > Signed-off-by: Alex Benn=C3=A9e >=20 > diff --git a/cpu-exec.c b/cpu-exec.c > index 45ef77b..771272f 100644 > --- a/cpu-exec.c > +++ b/cpu-exec.c > @@ -187,7 +187,10 @@ static inline TranslationBlock *tb_find_fast(CPUAr= chState *env) > tb =3D cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; > if (unlikely(!tb || tb->pc !=3D pc || tb->cs_base !=3D cs_base || > tb->flags !=3D flags)) { > + trace_inc_counter(&cpu->tb_jmp_cache_stats.misses); > tb =3D tb_find_slow(env, pc, cs_base, flags); > + } else { > + trace_inc_counter(&cpu->tb_jmp_cache_stats.hits); > } > return tb; > } > diff --git a/cputlb.c b/cputlb.c > index 7bd3573..672656a 100644 > --- a/cputlb.c > +++ b/cputlb.c > @@ -58,7 +58,7 @@ void tlb_flush(CPUState *cpu, int flush_global) > cpu->current_tb =3D NULL; > =20 > memset(env->tlb_table, -1, sizeof(env->tlb_table)); > - memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); > + tb_flush_all_jmp_cache(cpu); > =20 > env->tlb_flush_addr =3D -1; > env->tlb_flush_mask =3D 0; > diff --git a/include/qom/cpu.h b/include/qom/cpu.h > index df977c8..8376678 100644 > --- a/include/qom/cpu.h > +++ b/include/qom/cpu.h > @@ -243,6 +243,10 @@ struct CPUState { > void *env_ptr; /* CPUArchState */ > struct TranslationBlock *current_tb; > struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; > + struct { > + int hits; > + int misses; Is anything else going to be added here? If not, the indentation can be dropped. > + } tb_jmp_cache_stats; This is lacking documentation. Should be trivial to add for this field (not here, above the struct). To document the subfields we may need to name the struct. > struct GDBRegisterState *gdb_regs; > int gdb_num_regs; > int gdb_num_g_regs; > @@ -584,6 +588,15 @@ void cpu_exit(CPUState *cpu); > */ > void cpu_resume(CPUState *cpu); > =20 > + > +/** > + * tb_flush_all_jmp_cache: > + * @cpu: The CPU jmp cache to flush > + * > + * Flush all the entries from the cpu fast jump cache "CPU" for consistency > + */ > +void tb_flush_all_jmp_cache(CPUState *cpu); > + > /** > * qemu_init_vcpu: > * @cpu: The vCPU to initialize. > diff --git a/include/trace.h b/include/trace.h > index c15f498..7a9c0dc 100644 > --- a/include/trace.h > +++ b/include/trace.h > @@ -3,4 +3,14 @@ > =20 > #include "trace/generated-tracers.h" > =20 > +#ifndef CONFIG_TRACE_NOP > +static inline void trace_inc_counter(int *counter) { > + int cnt =3D *counter; > + cnt++; > + *counter =3D cnt; > +} > +#else > +static inline void trace_inc_counter(int *counter) { /* do nothing */ = } > +#endif > + > #endif /* TRACE_H */ Coding Style issues with the first function. For simplicity just keep the first implementation but with the proper brace placement, and then just put the #ifdef into the function body. That avoids the signatures getting out of sync. > diff --git a/qom/cpu.c b/qom/cpu.c > index fada2d4..956b36d 100644 > --- a/qom/cpu.c > +++ b/qom/cpu.c > @@ -244,7 +244,7 @@ static void cpu_common_reset(CPUState *cpu) > cpu->icount_extra =3D 0; > cpu->icount_decr.u32 =3D 0; > cpu->can_do_io =3D 0; > - memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *)); > + tb_flush_all_jmp_cache(cpu); > } > =20 > static bool cpu_common_has_work(CPUState *cs) > diff --git a/trace-events b/trace-events > index f8cc35f..5a58a11 100644 > --- a/trace-events > +++ b/trace-events > @@ -1244,6 +1244,9 @@ exec_tb_exit(void *next_tb, unsigned int flags) "= tb:%p flags=3D%x" > =20 > # translate-all.c > translate_block(void *tb, uintptr_t pc, uint8_t *tb_code) "tb:%p, pc:0= x%x, tb_code:%p" > +tb_flush(void) "" > +tb_flush_jump_cache(uintptr_t pc) "pc:0x%x" > +tb_flush_all_jump_cache(int hits, int misses) "hits:%d misses:%d" > =20 > # memory.c > memory_region_ops_read(void *mr, uint64_t addr, uint64_t value, unsign= ed size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u" > diff --git a/translate-all.c b/translate-all.c > index a11c083..8e7bbcc 100644 > --- a/translate-all.c > +++ b/translate-all.c > @@ -714,12 +714,22 @@ static void page_flush_tb(void) > } > } > =20 > +void tb_flush_all_jmp_cache(CPUState *cpu) > +{ > + trace_tb_flush_all_jump_cache(cpu->tb_jmp_cache_stats.hits, > + cpu->tb_jmp_cache_stats.misses); > + memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); > + memset((void *) &cpu->tb_jmp_cache_stats, 0, sizeof(cpu->tb_jmp_ca= che_stats)); > +} > + > /* flush all the translation blocks */ > /* XXX: tb_flush is currently not thread safe */ > void tb_flush(CPUArchState *env1) > { > CPUState *cpu =3D ENV_GET_CPU(env1); > =20 > + trace_tb_flush(); > + > #if defined(DEBUG_FLUSH) > printf("qemu: flush code_size=3D%ld nb_tbs=3D%d avg_tb_size=3D%ld\= n", > (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buf= fer), > @@ -734,7 +744,7 @@ void tb_flush(CPUArchState *env1) > tcg_ctx.tb_ctx.nb_tbs =3D 0; > =20 > CPU_FOREACH(cpu) { > - memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); > + tb_flush_all_jmp_cache(cpu); > } > =20 > memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_ph= ys_hash)); > @@ -1520,6 +1530,8 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulo= ng addr) > i =3D tb_jmp_cache_hash_page(addr); > memset(&cpu->tb_jmp_cache[i], 0, > TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); Can this one be dropped, too? > + > + trace_tb_flush_jump_cache(addr); > } > =20 > void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) Cheers, Andreas --=20 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 N=C3=BCrnberg, Germany GF: Jeff Hawn, Jennifer Guild, Felix Imend=C3=B6rffer; HRB 16746 AG N=C3=BC= rnberg