All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Alex Bennée" <alex.bennee@linaro.org>
To: Pranith Kumar <bobby.prani@gmail.com>
Cc: qemu-devel@nongnu.org, rth@twiddle.net
Subject: Re: [Qemu-devel] [PATCH 2/2] [TEST] Collect TLB and victim TLB hit/miss stats
Date: Mon, 10 Jul 2017 11:03:01 +0100	[thread overview]
Message-ID: <87van0a9t6.fsf@linaro.org> (raw)
In-Reply-To: <20170628050003.1809-3-bobby.prani@gmail.com>


Pranith Kumar <bobby.prani@gmail.com> writes:

> I used the following patch to collect hit/miss TLB ratios for a few
> benchmarks. The results can be found here: http://imgur.com/a/gee1o
>
> Please note that these results also include boot/shutdown as the
> per-region instrumentation patch came later.
>
> Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
> ---
>  accel/tcg/cputlb.c        | 12 ++++++++++++
>  cpus.c                    | 26 ++++++++++++++++++++++++++
>  include/exec/cpu-defs.h   |  4 ++++
>  include/sysemu/cpus.h     |  2 ++
>  target/arm/helper.c       |  6 +++++-
>  tcg/i386/tcg-target.inc.c | 16 ++++++++++++++--
>  vl.c                      |  3 +++
>  7 files changed, 66 insertions(+), 3 deletions(-)
>
> diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
> index ef52a7e5e0..2ac2397431 100644
> --- a/accel/tcg/cputlb.c
> +++ b/accel/tcg/cputlb.c
> @@ -864,12 +864,19 @@ static void io_writex(CPUArchState *env, CPUIOTLBEntry *iotlbentry,
>      }
>  }
>
> +extern bool enable_instrumentation;
> +

Is there a better place for this than a static global? I was pondering
tcg_ctx but that's not really visible to the runtime. Making it part of
the TB flags might be useful for only instrumenting certain segments of
the code but I suspect I'm bike-shedding at this point.

>  /* Return true if ADDR is present in the victim tlb, and has been copied
>     back to the main tlb.  */
>  static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
>                             size_t elt_ofs, target_ulong page)
>  {
>      size_t vidx;
> +
> +    if (enable_instrumentation) {
> +        env->tlb_access_victim++;
> +    }
> +
>      for (vidx = 0; vidx < CPU_VTLB_SIZE; ++vidx) {
>          CPUTLBEntry *vtlb = &env->tlb_v_table[mmu_idx][vidx];
>          target_ulong cmp = *(target_ulong *)((uintptr_t)vtlb + elt_ofs);
> @@ -885,6 +892,11 @@ static bool victim_tlb_hit(CPUArchState *env, size_t mmu_idx, size_t index,
>              CPUIOTLBEntry tmpio, *io = &env->iotlb[mmu_idx][index];
>              CPUIOTLBEntry *vio = &env->iotlb_v[mmu_idx][vidx];
>              tmpio = *io; *io = *vio; *vio = tmpio;
> +
> +            if (enable_instrumentation) {
> +                env->tlb_access_victim_hit++;
> +            }
> +
>              return true;
>          }
>      }
> diff --git a/cpus.c b/cpus.c
> index 14bb8d552e..14669b3469 100644
> --- a/cpus.c
> +++ b/cpus.c
> @@ -1602,6 +1602,32 @@ static bool all_vcpus_paused(void)
>      return true;
>  }
>
> +void print_tlb_stats(void)
> +{
> +    CPUState *cpu;
> +    CPU_FOREACH(cpu) {
> +        CPUArchState *cs = cpu->env_ptr;
> +
> +        fprintf(stderr, "TLB accesses %lu, hits %lu, victim accesses %lu, hits %lu\n",
> +                cs->tlb_access_total, cs->tlb_access_hit, cs->tlb_access_victim,
> +                cs->tlb_access_victim_hit);
> +    }
> +}
> +
> +void clear_tlb_stats(void)
> +{
> +    CPUState *cpu;
> +    CPU_FOREACH(cpu) {
> +        CPUArchState *cs = cpu->env_ptr;
> +
> +        cs->tlb_access_total        = 0;
> +        cs->tlb_access_hit          = 0;
> +        cs->tlb_access_victim       = 0;
> +        cs->tlb_access_victim       = 0;

Duplicate line here.

> +        cs->tlb_access_victim_hit   = 0;
> +    }
> +}
> +
>  void pause_all_vcpus(void)
>  {
>      CPUState *cpu;
> diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
> index 5f4e303635..29b3c2ada8 100644
> --- a/include/exec/cpu-defs.h
> +++ b/include/exec/cpu-defs.h
> @@ -138,6 +138,10 @@ typedef struct CPUIOTLBEntry {
>      target_ulong tlb_flush_addr;                                        \
>      target_ulong tlb_flush_mask;                                        \
>      target_ulong vtlb_index;                                            \
> +    target_ulong tlb_access_hit;                                        \
> +    target_ulong tlb_access_total;                                      \
> +    target_ulong tlb_access_victim;                                     \
> +    target_ulong tlb_access_victim_hit;                                 \
>
>  #else
>
> diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
> index 731756d948..7d8d92646c 100644
> --- a/include/sysemu/cpus.h
> +++ b/include/sysemu/cpus.h
> @@ -10,6 +10,8 @@ void resume_all_vcpus(void);
>  void pause_all_vcpus(void);
>  void cpu_stop_current(void);
>  void cpu_ticks_init(void);
> +void print_tlb_stats(void);
> +void clear_tlb_stats(void);
>
>  void configure_icount(QemuOpts *opts, Error **errp);
>  extern int use_icount;
> diff --git a/target/arm/helper.c b/target/arm/helper.c
> index dfbf03676c..d2e75b0f20 100644
> --- a/target/arm/helper.c
> +++ b/target/arm/helper.c
> @@ -1124,7 +1124,9 @@ static uint64_t pmxevtyper_read(CPUARMState *env, const ARMCPRegInfo *ri)
>      }
>  }
>
> -bool enable_instrumentation;
> +extern bool enable_instrumentation;
> +extern void print_tlb_stats(void);
> +extern void clear_tlb_stats(void);
>
>  static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri,
>                              uint64_t value)
> @@ -1139,6 +1141,8 @@ static void pmuserenr_write(CPUARMState *env, const ARMCPRegInfo *ri,
>      } else if (value == 0xfa11dead) {
>          printf("Disabling instrumentation\n");
>          enable_instrumentation = false;
> +        print_tlb_stats();
> +        clear_tlb_stats();
>          tb_flush(cs);
>      }

This needs to be part of the cputlb API so only one call is made from
the architecture helpers. I would expect this patch to be the first and
the pmuserenr_el0 (or whatever else) to be a per-arch enhancement patch
on top.

>
> diff --git a/tcg/i386/tcg-target.inc.c b/tcg/i386/tcg-target.inc.c
> index 9d7d25c017..b75bd54c35 100644
> --- a/tcg/i386/tcg-target.inc.c
> +++ b/tcg/i386/tcg-target.inc.c
> @@ -1250,6 +1250,8 @@ static void * const qemu_st_helpers[16] = {
>      [MO_BEQ]  = helper_be_stq_mmu,
>  };
>
> +extern bool enable_instrumentation;
> +
>  /* Perform the TLB load and compare.
>
>     Inputs:
> @@ -1300,6 +1302,12 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
>          }
>      }
>
> +    if (enable_instrumentation) {

Certainly inside the code generation I'd see this being controlled by
TCGContext, e.g. s->tlb_instruction

> +        tcg_out_ld(s, TCG_TYPE_I64, r0, TCG_AREG0, offsetof(CPUArchState, tlb_access_total));
> +        tcg_out_addi(s, r0, 1);
> +        tcg_out_st(s, TCG_TYPE_I64, r0, TCG_AREG0, offsetof(CPUArchState, tlb_access_total));
> +    }
> +
>      tcg_out_mov(s, tlbtype, r0, addrlo);
>      tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
>
> @@ -1348,11 +1356,15 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
>          s->code_ptr += 4;
>      }
>
> -    /* TLB Hit.  */
> -

why drop this comment?

>      /* add addend(r0), r1 */
>      tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, r1, r0,
>                           offsetof(CPUTLBEntry, addend) - which);
> +
> +    if (enable_instrumentation) {
> +        tcg_out_ld(s, TCG_TYPE_I64, r0, TCG_AREG0, offsetof(CPUArchState, tlb_access_hit));
> +        tcg_out_addi(s, r0, 1);
> +        tcg_out_st(s, TCG_TYPE_I64, r0, TCG_AREG0, offsetof(CPUArchState, tlb_access_hit));
> +    }
>  }
>
>  /*
> diff --git a/vl.c b/vl.c
> index 59fea15488..7fa392c79e 100644
> --- a/vl.c
> +++ b/vl.c
> @@ -192,6 +192,8 @@ int only_migratable; /* turn it off unless user states otherwise */
>
>  int icount_align_option;
>
> +bool enable_instrumentation;
> +
>  /* The bytes in qemu_uuid are in the order specified by RFC4122, _not_ in the
>   * little-endian "wire format" described in the SMBIOS 2.6 specification.
>   */
> @@ -4761,5 +4763,6 @@ int main(int argc, char **argv, char **envp)
>      qemu_chr_cleanup();
>      /* TODO: unref root container, check all devices are ok */
>
> +    print_tlb_stats();
>      return 0;
>  }

I appreciate this is currently test code for gathering numbers but it
would be nice to see if there is a nice way to integrate it upstream
(maybe for --enable-debug-tcg builds).

--
Alex Bennée

  reply	other threads:[~2017-07-10 10:03 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-28  5:00 [Qemu-devel] [TEST PATCH 0/2] Instrumentation and TLB stats Pranith Kumar
2017-06-28  5:00 ` [Qemu-devel] [PATCH 1/2] [TEST] aarch64: Use pmuserenr_el0 register for instrumentation Pranith Kumar
2017-06-28  9:27   ` Peter Maydell
2017-06-28  5:00 ` [Qemu-devel] [PATCH 2/2] [TEST] Collect TLB and victim TLB hit/miss stats Pranith Kumar
2017-07-10 10:03   ` Alex Bennée [this message]
2017-07-10 12:24     ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=87van0a9t6.fsf@linaro.org \
    --to=alex.bennee@linaro.org \
    --cc=bobby.prani@gmail.com \
    --cc=qemu-devel@nongnu.org \
    --cc=rth@twiddle.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.