From mboxrd@z Thu Jan  1 00:00:00 1970
Received: from eggs.gnu.org ([2001:4830:134:3::10]:45691)
	by lists.gnu.org with esmtp (Exim 4.71)
	(envelope-from <fred.konrad@greensocs.com>) id 1ZPs4H-00075l-4d
	for qemu-devel@nongnu.org; Thu, 13 Aug 2015 08:57:14 -0400
Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71)
	(envelope-from <fred.konrad@greensocs.com>) id 1ZPs4D-0001hG-SU
	for qemu-devel@nongnu.org; Thu, 13 Aug 2015 08:57:13 -0400
Received: from greensocs.com ([193.104.36.180]:36174)
	by eggs.gnu.org with esmtp (Exim 4.71)
	(envelope-from <fred.konrad@greensocs.com>) id 1ZPs4D-0001hB-IV
	for qemu-devel@nongnu.org; Thu, 13 Aug 2015 08:57:09 -0400
Message-ID: <55CC9422.3070602@greensocs.com>
Date: Thu, 13 Aug 2015 14:57:06 +0200
From: Frederic Konrad <fred.konrad@greensocs.com>
MIME-Version: 1.0
References: <1439397664-70734-1-git-send-email-pbonzini@redhat.com>
	<1439397664-70734-13-git-send-email-pbonzini@redhat.com>
In-Reply-To: <1439397664-70734-13-git-send-email-pbonzini@redhat.com>
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit
Subject: Re: [Qemu-devel] [PATCH 12/10] tcg: protect TBContext with tb_lock.
List-Id: <qemu-devel.nongnu.org>
List-Unsubscribe: <https://lists.nongnu.org/mailman/options/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=unsubscribe>
List-Archive: <http://lists.nongnu.org/archive/html/qemu-devel>
List-Post: <mailto:qemu-devel@nongnu.org>
List-Help: <mailto:qemu-devel-request@nongnu.org?subject=help>
List-Subscribe: <https://lists.nongnu.org/mailman/listinfo/qemu-devel>,
	<mailto:qemu-devel-request@nongnu.org?subject=subscribe>
To: Paolo Bonzini <pbonzini@redhat.com>, qemu-devel@nongnu.org
Cc: mttcg@greensocs.com

On 12/08/2015 18:41, Paolo Bonzini wrote:
> From: KONRAD Frederic <fred.konrad@greensocs.com>
>
> This protects TBContext with tb_lock to make tb_* thread safe.
>
> We can still have issue with tb_flush in case of multithread TCG:
> another CPU can be executing code during a flush.
>
> This can be fixed later by making all other TCG thread exiting before calling
> tb_flush().
>
> Signed-off-by: KONRAD Frederic <fred.konrad@greensocs.com>
>
> Changes:
> V6 -> V7:
>    * Drop a tb_lock in already locked restore_state_to_opc.
> V5 -> V6:
>    * Drop a tb_lock arround tb_find_fast in cpu-exec.c.
> Message-Id: <1439220437-23957-8-git-send-email-fred.konrad@greensocs.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>   cpu-exec.c         |  6 ++++++
>   exec.c             |  3 +++
>   hw/i386/kvmvapic.c |  2 ++
>   translate-all.c    | 38 ++++++++++++++++++++++++++++++++------
>   4 files changed, 43 insertions(+), 6 deletions(-)
>
> diff --git a/cpu-exec.c b/cpu-exec.c
> index e712c6a..89b66f5 100644
> --- a/cpu-exec.c
> +++ b/cpu-exec.c
> @@ -241,16 +241,22 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
>       if (max_cycles > CF_COUNT_MASK)
>           max_cycles = CF_COUNT_MASK;
>   
> +    tb_lock();
>       tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
>                        max_cycles | CF_NOCACHE);
tb_gen_code() calls tb_alloc() which calls tb_flush() we end in a double 
tb_lock here.
But that's probably not really important here as we want to either do a 
tb_flush
outside cpu_exec or realloc an other code buffer.

Fred
>       tb->orig_tb = tcg_ctx.tb_ctx.tb_invalidated_flag ? NULL : orig_tb;
>       cpu->current_tb = tb;
> +    tb_unlock();
> +
>       /* execute the generated code */
>       trace_exec_tb_nocache(tb, tb->pc);
>       cpu_tb_exec(cpu, tb->tc_ptr);
> +
> +    tb_lock();
>       cpu->current_tb = NULL;
>       tb_phys_invalidate(tb, -1);
>       tb_free(tb);
> +    tb_unlock();
>   }
>   #endif
>   
> diff --git a/exec.c b/exec.c
> index 856a859..9083307 100644
> --- a/exec.c
> +++ b/exec.c
> @@ -1948,6 +1948,9 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
>               wp->hitattrs = attrs;
>               if (!cpu->watchpoint_hit) {
>                   cpu->watchpoint_hit = wp;
> +
> +                /* Unlocked by cpu_loop_exit or cpu_resume_from_signal.  */
> +                tb_lock();
>                   tb_check_watchpoint(cpu);
>                   if (wp->flags & BP_STOP_BEFORE_ACCESS) {
>                       cpu->exception_index = EXCP_DEBUG;
> diff --git a/hw/i386/kvmvapic.c b/hw/i386/kvmvapic.c
> index c6d34b2..d823e15 100644
> --- a/hw/i386/kvmvapic.c
> +++ b/hw/i386/kvmvapic.c
> @@ -445,6 +445,8 @@ static void patch_instruction(VAPICROMState *s, X86CPU *cpu, target_ulong ip)
>       resume_all_vcpus();
>   
>       if (!kvm_enabled()) {
> +        /* Unlocked by cpu_resume_from_signal.  */
> +        tb_lock();
>           cs->current_tb = NULL;
>           tb_gen_code(cs, current_pc, current_cs_base, current_flags, 1);
>           cpu_resume_from_signal(cs, NULL);
> diff --git a/translate-all.c b/translate-all.c
> index 17d3cd1..7a4f8f1 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -301,6 +301,8 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
>   {
>       TranslationBlock *tb;
>   
> +    tb_lock();
> +
>       tb = tb_find_pc(retaddr);
>       if (tb) {
>           cpu_restore_state_from_tb(cpu, tb, retaddr);
> @@ -310,8 +312,12 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
>               tb_phys_invalidate(tb, -1);
>               tb_free(tb);
>           }
> +
> +        tb_unlock();
>           return true;
>       }
> +
> +    tb_unlock();
>       return false;
>   }
>   
> @@ -820,6 +826,8 @@ static void page_flush_tb(void)
>   /* XXX: tb_flush is currently not thread safe */
>   void tb_flush(CPUState *cpu)
>   {
> +    tb_lock();
> +
>   #if defined(DEBUG_FLUSH)
>       printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
>              (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
> @@ -844,6 +852,8 @@ void tb_flush(CPUState *cpu)
>       /* XXX: flush processor icache at this point if cache flush is
>          expensive */
>       tcg_ctx.tb_ctx.tb_flush_count++;
> +
> +    tb_unlock();
>   }
>   
>   #ifdef DEBUG_TB_CHECK
> @@ -1151,6 +1161,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>       /* we remove all the TBs in the range [start, end[ */
>       /* XXX: see if in some cases it could be faster to invalidate all
>          the code */
> +    tb_lock();
>       tb = p->first_tb;
>       while (tb != NULL) {
>           n = (uintptr_t)tb & 3;
> @@ -1218,12 +1229,13 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
>       if (current_tb_modified) {
>           /* we generate a block containing just the instruction
>              modifying the memory. It will ensure that it cannot modify
> -           itself */
> +           itself.  cpu_resume_from_signal unlocks tb_lock.  */
>           cpu->current_tb = NULL;
>           tb_gen_code(cpu, current_pc, current_cs_base, current_flags, 1);
>           cpu_resume_from_signal(cpu, NULL);
>       }
>   #endif
> +    tb_unlock();
>   }
>   
>   #ifdef CONFIG_SOFTMMU
> @@ -1290,6 +1302,8 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
>       if (!p) {
>           return;
>       }
> +
> +    tb_lock();
>       tb = p->first_tb;
>   #ifdef TARGET_HAS_PRECISE_SMC
>       if (tb && pc != 0) {
> @@ -1331,9 +1345,12 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
>           if (locked) {
>               mmap_unlock();
>           }
> +
> +        /* tb_lock released by cpu_resume_from_signal.  */
>           cpu_resume_from_signal(cpu, puc);
>       }
>   #endif
> +    tb_unlock();
>   }
>   #endif
>   
> @@ -1563,6 +1580,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
>       target_ulong pc, cs_base;
>       uint64_t flags;
>   
> +    tb_lock();
>       tb = tb_find_pc(retaddr);
>       if (!tb) {
>           cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
> @@ -1614,11 +1632,15 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
>       /* FIXME: In theory this could raise an exception.  In practice
>          we have already translated the block once so it's probably ok.  */
>       tb_gen_code(cpu, pc, cs_base, flags, cflags);
> -    /* TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
> -       the first in the TB) then we end up generating a whole new TB and
> -       repeating the fault, which is horribly inefficient.
> -       Better would be to execute just this insn uncached, or generate a
> -       second new TB.  */
> +
> +    /* This unlocks the tb_lock.
> +     *
> +     * TODO: If env->pc != tb->pc (i.e. the faulting instruction was not
> +     * the first in the TB) then we end up generating a whole new TB and
> +     * repeating the fault, which is horribly inefficient.
> +     * Better would be to execute just this insn uncached, or generate a
> +     * second new TB.
> +     */
>       cpu_resume_from_signal(cpu, NULL);
>   }
>   
> @@ -1643,6 +1665,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
>       int direct_jmp_count, direct_jmp2_count, cross_page;
>       TranslationBlock *tb;
>   
> +    tb_lock();
> +
>       target_code_size = 0;
>       max_target_code_size = 0;
>       cross_page = 0;
> @@ -1698,6 +1722,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
>               tcg_ctx.tb_ctx.tb_phys_invalidate_count);
>       cpu_fprintf(f, "TLB flush count     %d\n", tlb_flush_count);
>       tcg_dump_info(f, cpu_fprintf);
> +
> +    tb_unlock();
>   }
>   
>   void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf)