qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Paolo Bonzini <pbonzini@redhat.com>
To: "Emilio G. Cota" <cota@braap.org>,
	qemu-devel@nongnu.org, mttcg@listserver.greensocs.com
Cc: alex.bennee@linaro.org,
	Frederic Konrad <fred.konrad@greensocs.com>,
	mark.burton@greensocs.com, a.rigo@virtualopensystems.com,
	guillaume.delbergue@greensocs.com
Subject: Re: [Qemu-devel] [RFC 26/38] cpu: protect tb_jmp_cache with seqlock
Date: Fri, 4 Sep 2015 10:50:12 +0200	[thread overview]
Message-ID: <55E95B44.3060506@redhat.com> (raw)
In-Reply-To: <1440375847-17603-27-git-send-email-cota@braap.org>



On 24/08/2015 02:23, Emilio G. Cota wrote:
> This paves the way for a lockless tb_find_fast.

Having now reviewed the patch, I think we can do better.

The idea is:

- only the CPU thread can set cpu->tb_jmp_cache[]

- other threads can, under seqlock protection, _clear_ cpu->tb_jmp_cache[]

- the seqlock can be protected by tb_lock.  Then you need not retry the
read, you can just fall back to the slow path, which will take the
tb_lock and thus serialize with the clearer.

Paolo

> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
>  cpu-exec.c        |  8 +++++++-
>  exec.c            |  2 ++
>  include/qom/cpu.h | 15 +++++++++++++++
>  qom/cpu.c         |  2 +-
>  translate-all.c   | 32 +++++++++++++++++++++++++++++++-
>  5 files changed, 56 insertions(+), 3 deletions(-)
> 
> diff --git a/cpu-exec.c b/cpu-exec.c
> index f758928..5ad578d 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -334,7 +334,9 @@ static TranslationBlock *tb_find_slow(CPUState *cpu,
>      }
>  
>      /* we add the TB in the virtual pc hash table */
> +    seqlock_write_lock(&cpu->tb_jmp_cache_sequence);
>      cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)] = tb;
> +    seqlock_write_unlock(&cpu->tb_jmp_cache_sequence);
>      return tb;
>  }
>  
> @@ -343,13 +345,17 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu)
>      CPUArchState *env = (CPUArchState *)cpu->env_ptr;
>      TranslationBlock *tb;
>      target_ulong cs_base, pc;
> +    unsigned int version;
>      int flags;
>  
>      /* we record a subset of the CPU state. It will
>         always be the same before a given translated block
>         is executed. */
>      cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
> -    tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
> +    do {
> +        version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence);
> +        tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)];
> +    } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version));
>      if (unlikely(!tb || tb->pc != pc || tb->cs_base != cs_base ||
>                   tb->flags != flags)) {
>          tb = tb_find_slow(cpu, pc, cs_base, flags);
> diff --git a/exec.c b/exec.c
> index edf2236..ae6f416 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -577,6 +577,8 @@ void cpu_exec_init(CPUState *cpu, Error **errp)
>      int cpu_index;
>      Error *local_err = NULL;
>  
> +    qemu_mutex_init(&cpu->tb_jmp_cache_lock);
> +    seqlock_init(&cpu->tb_jmp_cache_sequence, &cpu->tb_jmp_cache_lock);
>  #ifndef CONFIG_USER_ONLY
>      cpu->as = &address_space_memory;
>      cpu->thread_id = qemu_get_thread_id();
> diff --git a/include/qom/cpu.h b/include/qom/cpu.h
> index dbe0438..f383c24 100644
> --- a/include/qom/cpu.h
> +++ b/include/qom/cpu.h
> @@ -27,6 +27,7 @@
>  #include "exec/hwaddr.h"
>  #include "exec/memattrs.h"
>  #include "qemu/queue.h"
> +#include "qemu/seqlock.h"
>  #include "qemu/thread.h"
>  #include "qemu/typedefs.h"
>  
> @@ -287,6 +288,13 @@ struct CPUState {
>  
>      void *env_ptr; /* CPUArchState */
>      struct TranslationBlock *current_tb;
> +    /*
> +     * The seqlock here is needed because not all updates are to a single
> +     * entry; sometimes we want to atomically clear all entries that belong to
> +     * a given page, e.g. when flushing said page.
> +     */
> +    QemuMutex tb_jmp_cache_lock;
> +    QemuSeqLock tb_jmp_cache_sequence;
>      struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE];
>  
>      struct GDBRegisterState *gdb_regs;
> @@ -342,6 +350,13 @@ extern struct CPUTailQ cpus;
>  
>  extern __thread CPUState *current_cpu;
>  
> +static inline void cpu_tb_jmp_cache_clear(CPUState *cpu)
> +{
> +    seqlock_write_lock(&cpu->tb_jmp_cache_sequence);
> +    memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
> +    seqlock_write_unlock(&cpu->tb_jmp_cache_sequence);
> +}
> +
>  /**
>   * cpu_paging_enabled:
>   * @cpu: The CPU whose state is to be inspected.
> diff --git a/qom/cpu.c b/qom/cpu.c
> index ac19710..5e72e7a 100644
> --- a/qom/cpu.c
> +++ b/qom/cpu.c
> @@ -251,7 +251,7 @@ static void cpu_common_reset(CPUState *cpu)
>      cpu->icount_decr.u32 = 0;
>      cpu->can_do_io = 1;
>      cpu->exception_index = -1;
> -    memset(cpu->tb_jmp_cache, 0, TB_JMP_CACHE_SIZE * sizeof(void *));
> +    cpu_tb_jmp_cache_clear(cpu);
>  }
>  
>  static bool cpu_common_has_work(CPUState *cs)
> diff --git a/translate-all.c b/translate-all.c
> index 76a0be8..668b43a 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -863,7 +863,7 @@ void tb_flush(CPUState *cpu)
>      tcg_ctx.tb_ctx.nb_tbs = 0;
>  
>      CPU_FOREACH(cpu) {
> -        memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
> +        cpu_tb_jmp_cache_clear(cpu);
>      }
>  
>      memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
> @@ -988,6 +988,27 @@ static inline void tb_jmp_remove(TranslationBlock *tb, int n)
>      }
>  }
>  
> +static inline void tb_jmp_cache_entry_clear(CPUState *cpu, TranslationBlock *tb)
> +{
> +    unsigned int version;
> +    unsigned int h;
> +    bool hit = false;
> +
> +    h = tb_jmp_cache_hash_func(tb->pc);
> +    do {
> +        version = seqlock_read_begin(&cpu->tb_jmp_cache_sequence);
> +        hit = cpu->tb_jmp_cache[h] == tb;
> +    } while (seqlock_read_retry(&cpu->tb_jmp_cache_sequence, version));
> +
> +    if (hit) {
> +        seqlock_write_lock(&cpu->tb_jmp_cache_sequence);
> +        if (likely(cpu->tb_jmp_cache[h] == tb)) {
> +            cpu->tb_jmp_cache[h] = NULL;
> +        }
> +        seqlock_write_unlock(&cpu->tb_jmp_cache_sequence);
> +    }
> +}
> +
>  /* invalidate one TB
>   *
>   * Called with tb_lock held.
> @@ -1024,6 +1045,13 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
>          invalidate_page_bitmap(p);
>      }
>  
> +    tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
> +
> +    /* remove the TB from the hash list */
> +    CPU_FOREACH(cpu) {
> +        tb_jmp_cache_entry_clear(cpu, tb);
> +    }
> +
>      /* suppress this TB from the two jump lists */
>      tb_jmp_remove(tb, 0);
>      tb_jmp_remove(tb, 1);
> @@ -1707,12 +1735,14 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
>      /* Discard jump cache entries for any tb which might potentially
>         overlap the flushed page.  */
>      i = tb_jmp_cache_hash_page(addr - TARGET_PAGE_SIZE);
> +    seqlock_write_lock(&cpu->tb_jmp_cache_sequence);
>      memset(&cpu->tb_jmp_cache[i], 0,
>             TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
>  
>      i = tb_jmp_cache_hash_page(addr);
>      memset(&cpu->tb_jmp_cache[i], 0,
>             TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
> +    seqlock_write_unlock(&cpu->tb_jmp_cache_sequence);
>  }
>  
>  void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
> 

  parent reply	other threads:[~2015-09-04  8:50 UTC|newest]

Thread overview: 110+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-24  0:23 [Qemu-devel] [RFC 00/38] MTTCG: i386, user+system mode Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 01/38] cpu-exec: add missing mmap_lock in tb_find_slow Emilio G. Cota
2015-09-07 15:33   ` Alex Bennée
2015-08-24  0:23 ` [Qemu-devel] [RFC 02/38] hw/i386/kvmvapic: add missing include of tcg.h Emilio G. Cota
2015-09-07 15:49   ` Alex Bennée
2015-09-07 16:11     ` Paolo Bonzini
2015-08-24  0:23 ` [Qemu-devel] [RFC 03/38] cpu-exec: set current_cpu at cpu_exec() Emilio G. Cota
2015-08-24  1:03   ` Paolo Bonzini
2015-08-25  0:41     ` [Qemu-devel] [PATCH 1/4] cpus: add qemu_cpu_thread_init_common() to avoid code duplication Emilio G. Cota
2015-08-25  0:41       ` [Qemu-devel] [PATCH 2/4] linux-user: add helper to set current_cpu before cpu_loop() Emilio G. Cota
2015-08-25  0:41       ` [Qemu-devel] [PATCH 3/4] linux-user: call rcu_(un)register_thread on thread creation/deletion Emilio G. Cota
2015-08-26  0:22         ` Paolo Bonzini
2015-08-25  0:41       ` [Qemu-devel] [PATCH 4/4] bsd-user: add helper to set current_cpu before cpu_loop() Emilio G. Cota
2015-08-25 18:07         ` Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 04/38] translate-all: remove volatile from have_tb_lock Emilio G. Cota
2015-09-07 15:50   ` Alex Bennée
2015-09-07 16:12     ` Paolo Bonzini
2015-08-24  0:23 ` [Qemu-devel] [RFC 05/38] thread-posix: inline qemu_spin functions Emilio G. Cota
2015-08-24  1:04   ` Paolo Bonzini
2015-08-25  2:30     ` Emilio G. Cota
2015-08-25 19:30       ` Emilio G. Cota
2015-08-25 22:53         ` Paolo Bonzini
2015-08-24  0:23 ` [Qemu-devel] [RFC 06/38] seqlock: add missing 'inline' to seqlock_read_retry Emilio G. Cota
2015-09-07 15:50   ` Alex Bennée
2015-08-24  0:23 ` [Qemu-devel] [RFC 07/38] seqlock: read sequence number atomically Emilio G. Cota
2015-09-07 15:53   ` Alex Bennée
2015-09-07 16:13     ` Paolo Bonzini
2015-08-24  0:23 ` [Qemu-devel] [RFC 08/38] rcu: init rcu_registry_lock after fork Emilio G. Cota
2015-09-08 17:34   ` Alex Bennée
2015-09-08 19:03     ` Emilio G. Cota
2015-09-09  9:35       ` Alex Bennée
2015-08-24  0:23 ` [Qemu-devel] [RFC 09/38] rcu: fix comment with s/rcu_gp_lock/rcu_registry_lock/ Emilio G. Cota
2015-09-10 11:18   ` Alex Bennée
2015-08-24  0:23 ` [Qemu-devel] [RFC 10/38] translate-all: remove obsolete comment about l1_map Emilio G. Cota
2015-09-10 11:59   ` Alex Bennée
2015-08-24  0:23 ` [Qemu-devel] [RFC 11/38] qemu-thread: handle spurious futex_wait wakeups Emilio G. Cota
2015-09-10 13:22   ` Alex Bennée
2015-09-10 17:46     ` Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 12/38] linux-user: call rcu_(un)register_thread on pthread_(exit|create) Emilio G. Cota
2015-08-25  0:45   ` Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 13/38] cputlb: add physical address to CPUTLBEntry Emilio G. Cota
2015-09-10 13:49   ` Alex Bennée
2015-09-10 17:50     ` Emilio G. Cota
2015-09-21  5:01   ` Paolo Bonzini
2015-08-24  0:23 ` [Qemu-devel] [RFC 14/38] softmmu: add helpers to get ld/st physical addresses Emilio G. Cota
2015-08-24  2:02   ` Paolo Bonzini
2015-08-25  2:47     ` Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 15/38] radix-tree: add generic lockless radix tree module Emilio G. Cota
2015-09-10 14:25   ` Alex Bennée
2015-09-10 18:00     ` Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 16/38] aie: add module for Atomic Instruction Emulation Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 17/38] aie: add target helpers Emilio G. Cota
2015-09-17 15:14   ` Alex Bennée
2015-09-21  5:18   ` Paolo Bonzini
2015-09-21 20:59     ` Alex Bennée
2015-08-24  0:23 ` [Qemu-devel] [RFC 18/38] tcg: add fences Emilio G. Cota
2015-09-10 15:28   ` Alex Bennée
2015-08-24  0:23 ` [Qemu-devel] [RFC 19/38] tcg: add tcg_gen_smp_rmb() Emilio G. Cota
2015-09-10 16:01   ` Alex Bennée
2015-09-10 18:05     ` Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 20/38] tcg/i386: implement fences Emilio G. Cota
2015-08-24  1:32   ` Paolo Bonzini
2015-08-25  3:02     ` Emilio G. Cota
2015-08-25 22:55       ` Paolo Bonzini
2015-08-24  0:23 ` [Qemu-devel] [RFC 21/38] target-i386: emulate atomic instructions + barriers using AIE Emilio G. Cota
2015-09-17 15:30   ` Alex Bennée
2015-08-24  0:23 ` [Qemu-devel] [RFC 22/38] cpu: update interrupt_request atomically Emilio G. Cota
2015-08-24  1:09   ` Paolo Bonzini
2015-08-25 20:36     ` Emilio G. Cota
2015-08-25 22:52       ` Paolo Bonzini
2015-08-24  0:23 ` [Qemu-devel] [RFC 23/38] cpu-exec: grab iothread lock during interrupt handling Emilio G. Cota
2015-09-09 10:13   ` Paolo Bonzini
2015-08-24  0:23 ` [Qemu-devel] [RFC 24/38] cpu-exec: reset mmap_lock after exiting the CPU loop Emilio G. Cota
2015-08-24  2:01   ` Paolo Bonzini
2015-08-25 21:16     ` Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 25/38] cpu: add barriers around cpu->tcg_exit_req Emilio G. Cota
2015-08-24  2:01   ` Paolo Bonzini
2015-08-24  0:23 ` [Qemu-devel] [RFC 26/38] cpu: protect tb_jmp_cache with seqlock Emilio G. Cota
2015-08-24  1:14   ` Paolo Bonzini
2015-08-25 21:46     ` Emilio G. Cota
2015-08-25 22:49       ` Paolo Bonzini
2015-09-04  8:50   ` Paolo Bonzini [this message]
2015-09-04 10:04     ` Paolo Bonzini
2015-08-24  0:23 ` [Qemu-devel] [RFC 27/38] cpu-exec: convert tb_invalidated_flag into a per-TB flag Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 28/38] cpu-exec: use RCU to perform lockless TB lookups Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 29/38] tcg: export have_tb_lock Emilio G. Cota
2015-08-24  0:23 ` [Qemu-devel] [RFC 30/38] translate-all: add tb_lock assertions Emilio G. Cota
2015-08-24  0:24 ` [Qemu-devel] [RFC 31/38] cpu: protect l1_map with tb_lock in full-system mode Emilio G. Cota
2015-08-24  1:07   ` Paolo Bonzini
2015-08-25 21:54     ` Emilio G. Cota
2015-08-24  0:24 ` [Qemu-devel] [RFC 32/38] cpu list: convert to RCU QLIST Emilio G. Cota
2015-08-24  0:24 ` [Qemu-devel] [RFC 33/38] cpu: introduce cpu_tcg_sched_work to run work while other CPUs sleep Emilio G. Cota
2015-08-24  1:24   ` Paolo Bonzini
2015-08-25 22:18     ` Emilio G. Cota
2015-08-24  0:24 ` [Qemu-devel] [RFC 34/38] translate-all: use tcg_sched_work for tb_flush Emilio G. Cota
2015-08-24  0:24 ` [Qemu-devel] [RFC 35/38] cputlb: use cpu_tcg_sched_work for tlb_flush_all Emilio G. Cota
2015-08-24  1:29   ` Paolo Bonzini
2015-08-25 22:31     ` Emilio G. Cota
2015-08-26  0:25       ` Paolo Bonzini
2015-09-01 16:10   ` Alex Bennée
2015-09-01 19:38     ` Emilio G. Cota
2015-09-01 20:18       ` Peter Maydell
2015-08-24  0:24 ` [Qemu-devel] [RFC 36/38] cputlb: use tcg_sched_work for tlb_flush_page_all Emilio G. Cota
2015-08-24  0:24 ` [Qemu-devel] [RFC 37/38] cpus: remove async_run_safe_work_on_cpu Emilio G. Cota
2015-08-24  0:24 ` [Qemu-devel] [RFC 38/38] Revert "target-i386: yield to another VCPU on PAUSE" Emilio G. Cota
2015-08-24  1:29   ` Paolo Bonzini
2015-08-24  2:01 ` [Qemu-devel] [RFC 00/38] MTTCG: i386, user+system mode Paolo Bonzini
2015-08-25 22:36   ` Emilio G. Cota
2015-08-24 16:08 ` Artyom Tarasenko
2015-08-24 20:16   ` Emilio G. Cota

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=55E95B44.3060506@redhat.com \
    --to=pbonzini@redhat.com \
    --cc=a.rigo@virtualopensystems.com \
    --cc=alex.bennee@linaro.org \
    --cc=cota@braap.org \
    --cc=fred.konrad@greensocs.com \
    --cc=guillaume.delbergue@greensocs.com \
    --cc=mark.burton@greensocs.com \
    --cc=mttcg@listserver.greensocs.com \
    --cc=qemu-devel@nongnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).