qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Alexander Graf <agraf@suse.de>
To: Paolo Bonzini <pbonzini@redhat.com>, qemu-devel@nongnu.org
Cc: dgibson@redhat.com, qemu-ppc@nongnu.org, tommusta@gmail.com
Subject: Re: [Qemu-devel] [Qemu-ppc] [PATCH 02/17] ppc: avoid excessive TLB flushing
Date: Fri, 05 Sep 2014 09:10:01 +0200	[thread overview]
Message-ID: <540961C9.9000705@suse.de> (raw)
In-Reply-To: <1409246113-6519-3-git-send-email-pbonzini@redhat.com>



On 28.08.14 19:14, Paolo Bonzini wrote:
> PowerPC TCG flushes the TLB on every IR/DR change, which basically
> means on every user<->kernel context switch.  Use the 6-element
> TLB array as a cache, where each MMU index is mapped to a different
> state of the IR/DR/PR/HV bits.
> 
> This brings the number of TLB flushes down from ~900000 to ~50000
> for starting up the Debian installer, which is in line with x86
> and gives a ~10% performance improvement.
> 
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  cputlb.c                    | 19 +++++++++++++++++
>  hw/ppc/spapr_hcall.c        |  6 +++++-
>  include/exec/exec-all.h     |  5 +++++
>  target-ppc/cpu.h            |  4 +++-
>  target-ppc/excp_helper.c    |  6 +-----
>  target-ppc/helper_regs.h    | 52 +++++++++++++++++++++++++++++++--------------
>  target-ppc/translate_init.c |  5 +++++
>  7 files changed, 74 insertions(+), 23 deletions(-)
> 
> diff --git a/cputlb.c b/cputlb.c
> index afd3705..17e1b03 100644
> --- a/cputlb.c
> +++ b/cputlb.c
> @@ -67,6 +67,25 @@ void tlb_flush(CPUState *cpu, int flush_global)
>      tlb_flush_count++;
>  }
>  
> +void tlb_flush_idx(CPUState *cpu, int mmu_idx)
> +{
> +    CPUArchState *env = cpu->env_ptr;
> +
> +#if defined(DEBUG_TLB)
> +    printf("tlb_flush_idx %d:\n", mmu_idx);
> +#endif
> +    /* must reset current TB so that interrupts cannot modify the
> +       links while we are modifying them */
> +    cpu->current_tb = NULL;
> +
> +    memset(env->tlb_table[mmu_idx], -1, sizeof(env->tlb_table[mmu_idx]));
> +    memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
> +
> +    env->tlb_flush_addr = -1;
> +    env->tlb_flush_mask = 0;
> +    tlb_flush_count++;
> +}
> +
>  static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
>  {
>      if (addr == (tlb_entry->addr_read &
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 467858c..b95961c 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -556,13 +556,17 @@ static target_ulong h_cede(PowerPCCPU *cpu, sPAPREnvironment *spapr,
>  {
>      CPUPPCState *env = &cpu->env;
>      CPUState *cs = CPU(cpu);
> +    bool flush;
>  
>      env->msr |= (1ULL << MSR_EE);
> -    hreg_compute_hflags(env);
> +    flush = hreg_compute_hflags(env);
>      if (!cpu_has_work(cs)) {
>          cs->halted = 1;
>          cs->exception_index = EXCP_HLT;
>          cs->exit_request = 1;
> +    } else if (flush) {
> +        cs->interrupt_request |= CPU_INTERRUPT_EXITTB;
> +        cs->exit_request = 1;

Can this ever happen?

>      }
>      return H_SUCCESS;
>  }
> diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
> index 5e5d86e..629a550 100644
> --- a/include/exec/exec-all.h
> +++ b/include/exec/exec-all.h
> @@ -100,6 +100,7 @@ void tcg_cpu_address_space_init(CPUState *cpu, AddressSpace *as);
>  /* cputlb.c */
>  void tlb_flush_page(CPUState *cpu, target_ulong addr);
>  void tlb_flush(CPUState *cpu, int flush_global);
> +void tlb_flush_idx(CPUState *cpu, int mmu_idx);
>  void tlb_set_page(CPUState *cpu, target_ulong vaddr,
>                    hwaddr paddr, int prot,
>                    int mmu_idx, target_ulong size);
> @@ -112,6 +113,10 @@ static inline void tlb_flush_page(CPUState *cpu, target_ulong addr)
>  static inline void tlb_flush(CPUState *cpu, int flush_global)
>  {
>  }
> +
> +static inline void tlb_flush_idx(CPUState *cpu, int mmu_idx)
> +{
> +}
>  #endif
>  
>  #define CODE_GEN_ALIGN           16 /* must be >= of the size of a icache line */
> diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
> index b64c652..c1cb27f 100644
> --- a/target-ppc/cpu.h
> +++ b/target-ppc/cpu.h
> @@ -922,7 +922,7 @@ struct ppc_segment_page_sizes {
>  
>  /*****************************************************************************/
>  /* The whole PowerPC CPU context */
> -#define NB_MMU_MODES 3
> +#define NB_MMU_MODES 6
>  
>  #define PPC_CPU_OPCODES_LEN 0x40
>  
> @@ -1085,6 +1085,8 @@ struct CPUPPCState {
>      target_ulong hflags;      /* hflags is a MSR & HFLAGS_MASK         */
>      target_ulong hflags_nmsr; /* specific hflags, not coming from MSR */
>      int mmu_idx;         /* precomputed MMU index to speed up mem accesses */
> +    uint32_t mmu_msr[NB_MMU_MODES];  /* ir/dr/hv/pr values for TLBs */
> +    int mmu_fifo;  /* for replacement in mmu_msr */
>  
>      /* Power management */
>      int (*check_pow)(CPUPPCState *env);
> diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
> index be71590..bf25d44 100644
> --- a/target-ppc/excp_helper.c
> +++ b/target-ppc/excp_helper.c
> @@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>  
>      if (env->spr[SPR_LPCR] & LPCR_AIL) {
>          new_msr |= (1 << MSR_IR) | (1 << MSR_DR);
> -    } else if (msr & ((1 << MSR_IR) | (1 << MSR_DR))) {
> -        /* If we disactivated any translation, flush TLBs */
> -        tlb_flush(cs, 1);
>      }
>  
>  #ifdef TARGET_PPC64
> @@ -678,8 +675,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int excp_model, int excp)
>      if ((env->mmu_model == POWERPC_MMU_BOOKE) ||
>          (env->mmu_model == POWERPC_MMU_BOOKE206)) {
>          /* XXX: The BookE changes address space when switching modes,
> -                we should probably implement that as different MMU indexes,
> -                but for the moment we do it the slow way and flush all.  */
> +                TODO: still needed?!?  */
>          tlb_flush(cs, 1);
>      }
>  }
> diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
> index 271fddf..291f9c1 100644
> --- a/target-ppc/helper_regs.h
> +++ b/target-ppc/helper_regs.h
> @@ -39,17 +39,38 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
>      env->tgpr[3] = tmp;
>  }
>  
> -static inline void hreg_compute_mem_idx(CPUPPCState *env)
> +static inline bool hreg_compute_mem_idx(CPUPPCState *env)
>  {
> -    /* Precompute MMU index */
> -    if (msr_pr == 0 && msr_hv != 0) {
> -        env->mmu_idx = 2;
> -    } else {
> -        env->mmu_idx = 1 - msr_pr;
> +    CPUState *cs = CPU(ppc_env_get_cpu(env));
> +    int msr = env->msr;
> +    int i;
> +
> +    if (!tcg_enabled()) {
> +        return false;
> +    }
> +
> +    msr &= (1 << MSR_IR) | (1 << MSR_DR) | (1 << MSR_PR) | MSR_HVB;
> +    if (msr_pr == 1) {
> +        msr &= ~MSR_HVB;
>      }
> +
> +    for (i = 0; i < NB_MMU_MODES; i++) {
> +        if (env->mmu_msr[i] == msr) {
> +            env->mmu_idx = i;
> +            return false;
> +        }
> +    }
> +
> +    /* Use a new index with FIFO replacement.  */
> +    i = (env->mmu_fifo == NB_MMU_MODES - 1 ? 0 : env->mmu_fifo + 1);
> +    env->mmu_fifo = i;
> +    env->mmu_msr[i] = msr;
> +    env->mmu_idx = i;
> +    tlb_flush_idx(cs, i);
> +    return true;
>  }

Ok, so this basically changes the semantics of mmu_idx from a static
array with predefined meanings to a dynamic array with runtime changing
semantics.

The first thing that comes to mind here is why we're not just extending
the existing array? After all, we have 4 bits -> 16 states minus one for
PR+HV. Can our existing logic not deal with this?

Second thing I'm failing to grasp still is that in the previous patch
you're changing ctx.mem_idx into to different static semantics. But that
mem_idx gets passed to our ld/st helpers which again boils down to the
mem_idx above, no? So aren't we accessing random unrelated mmu contexts now?

There's a good chance I'm not fully grasping something here :).


Alex

  parent reply	other threads:[~2014-09-05  7:10 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-08-28 17:14 [Qemu-devel] [RFT/RFH PATCH 00/16] PPC speedup patches for TCG Paolo Bonzini
2014-08-28 17:14 ` [Qemu-devel] [PATCH 01/17] ppc: do not look at the MMU index Paolo Bonzini
2014-08-28 17:14 ` [Qemu-devel] [PATCH 02/17] ppc: avoid excessive TLB flushing Paolo Bonzini
2014-08-28 17:30   ` Peter Maydell
2014-08-28 19:35     ` Paolo Bonzini
2014-09-05  6:00       ` David Gibson
2014-09-05  7:10   ` Alexander Graf [this message]
2014-09-05 12:11     ` [Qemu-devel] [Qemu-ppc] " Paolo Bonzini
2014-09-09 16:42       ` Paolo Bonzini
2014-09-09 20:51         ` Alexander Graf
2014-08-28 17:14 ` [Qemu-devel] [PATCH 03/17] ppc: fix monitor access to CR Paolo Bonzini
2014-09-03 18:21   ` Tom Musta
2014-09-05  7:10     ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-08-28 17:15 ` [Qemu-devel] [PATCH 04/17] ppc: use ARRAY_SIZE in gdbstub.c Paolo Bonzini
2014-09-03 18:21   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 05/17] ppc: use CRF_* in fpu_helper.c Paolo Bonzini
2014-09-03 18:21   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 06/17] ppc: use CRF_* in int_helper.c Paolo Bonzini
2014-09-03 18:28   ` Tom Musta
2014-09-05  7:12     ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-08-28 17:15 ` [Qemu-devel] [PATCH 07/17] ppc: fix result of DLMZB when no zero bytes are found Paolo Bonzini
2014-09-03 18:28   ` Tom Musta
2014-09-05  7:26     ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-08-28 17:15 ` [Qemu-devel] [PATCH 08/17] ppc: introduce helpers for mfocrf/mtocrf Paolo Bonzini
2014-09-03 18:28   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 09/17] ppc: reorganize gen_compute_fprf Paolo Bonzini
2014-09-03 18:29   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 10/17] ppc: introduce gen_op_mfcr/gen_op_mtcr Paolo Bonzini
2014-09-03 18:58   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 11/17] ppc: rename gen_set_cr6_from_fpscr Paolo Bonzini
2014-09-03 19:41   ` Tom Musta
2014-09-05  7:27     ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-08-28 17:15 ` [Qemu-devel] [PATCH 12/17] ppc: use movcond for isel Paolo Bonzini
2014-08-29 18:30   ` Richard Henderson
2014-09-03 19:41   ` Tom Musta
2014-09-15 13:39     ` Paolo Bonzini
2014-08-28 17:15 ` [Qemu-devel] [PATCH 13/17] ppc: compute mask from BI using right shift Paolo Bonzini
2014-09-03 20:59   ` Tom Musta
2014-09-05  7:29     ` [Qemu-devel] [Qemu-ppc] " Alexander Graf
2014-08-28 17:15 ` [Qemu-devel] [PATCH 14/17] ppc: introduce ppc_get_crf and ppc_set_crf Paolo Bonzini
2014-09-04 18:26   ` Tom Musta
2014-08-28 17:15 ` [Qemu-devel] [PATCH 15/17] ppc: store CR registers in 32 1-bit registers Paolo Bonzini
2014-09-04 18:27   ` Tom Musta
2014-09-09 15:44     ` Paolo Bonzini
2014-09-09 16:41       ` Paolo Bonzini
2014-09-09 16:03     ` Richard Henderson
2014-09-09 16:26       ` Paolo Bonzini
2014-08-28 17:15 ` [Qemu-devel] [PATCH 16/17] ppc: inline ppc_get_crf/ppc_set_crf when clearer Paolo Bonzini
2014-08-28 17:15 ` [Qemu-devel] [PATCH 17/17] ppc: dump all 32 CR bits Paolo Bonzini
2014-08-28 18:05 ` [Qemu-devel] [RFT/RFH PATCH 00/16] PPC speedup patches for TCG Tom Musta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=540961C9.9000705@suse.de \
    --to=agraf@suse.de \
    --cc=dgibson@redhat.com \
    --cc=pbonzini@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=qemu-ppc@nongnu.org \
    --cc=tommusta@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).