From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:55363) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZPsck-0005MY-3g for qemu-devel@nongnu.org; Thu, 13 Aug 2015 09:32:51 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1ZPscg-0001Nd-2l for qemu-devel@nongnu.org; Thu, 13 Aug 2015 09:32:50 -0400 Received: from greensocs.com ([193.104.36.180]:45077) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1ZPscf-0001NY-P3 for qemu-devel@nongnu.org; Thu, 13 Aug 2015 09:32:46 -0400 Message-ID: <55CC9C6A.4070309@greensocs.com> Date: Thu, 13 Aug 2015 15:32:26 +0200 From: Frederic Konrad MIME-Version: 1.0 References: <1439397664-70734-1-git-send-email-pbonzini@redhat.com> <1439397664-70734-12-git-send-email-pbonzini@redhat.com> <55CC92CC.4020201@greensocs.com> <55CC9496.8050905@redhat.com> In-Reply-To: <55CC9496.8050905@redhat.com> Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit Subject: Re: [Qemu-devel] [PATCH 11/10] tcg: comment on which functions have to be called with tb_lock held List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: Paolo Bonzini , qemu-devel@nongnu.org Cc: mttcg@greensocs.com On 13/08/2015 14:59, Paolo Bonzini wrote: > > On 13/08/2015 14:51, Frederic Konrad wrote: >>> diff --git a/include/qom/cpu.h b/include/qom/cpu.h >>> index 77bbff2..56b1f4d 100644 >>> --- a/include/qom/cpu.h >>> +++ b/include/qom/cpu.h >>> @@ -285,7 +285,10 @@ struct CPUState { >>> void *env_ptr; /* CPUArchState */ >>> struct TranslationBlock *current_tb; >>> + >>> + /* Protected by tb_lock. */ >>> struct TranslationBlock *tb_jmp_cache[TB_JMP_CACHE_SIZE]; >> This is temporary as a first step? > Yes, I now saw that tb_lock has a huge contention in tb_find_fast. :) Yes it is just enormous. Makes MTTCG 2x slower than upstream :). > I've now extracted parts of your patch "tcg: protect TBContext with > tb_lock" into a separate "tcg: move tb_find_fast outside the tb_lock > critical section" that also applies to user-mode emulation. That way I > get good scalability on Dhrystone, same as with your branch. I guess with the whole tlb/tb flush safe? Which is theorically protecting tb_jmp_cache (or at least let only the right thread accessing it). The drawback of all that is I'm not sure this is faster when we have a lot of context switches. For tb_flush it's not really a problem as it happen approximately never but the tb_invalidate, tlb_*_flush are more regular. Fred > > Do you agree with the first 10 patches as a first step towards > upstreaming the MTTCG work? > > Paolo > >>> + >>> struct GDBRegisterState *gdb_regs; >>> int gdb_num_regs; >>> int gdb_num_g_regs; >>> diff --git a/tcg/tcg.h b/tcg/tcg.h >>> index 0ae648f..a2cad31 100644 >>> --- a/tcg/tcg.h >>> +++ b/tcg/tcg.h >>> @@ -590,6 +590,7 @@ static inline bool tcg_op_buf_full(void) >>> /* pool based memory allocation */ >>> +/* tb_lock must be held for tcg_malloc_internal. */ >>> void *tcg_malloc_internal(TCGContext *s, int size); >>> void tcg_pool_reset(TCGContext *s); >>> void tcg_pool_delete(TCGContext *s); >>> @@ -598,6 +599,7 @@ void tb_lock(void); >>> void tb_unlock(void); >>> void tb_lock_reset(void); >>> +/* Called with tb_lock held. */ >>> static inline void *tcg_malloc(int size) >>> { >>> TCGContext *s = &tcg_ctx; >>> diff --git a/translate-all.c b/translate-all.c >>> index edb9cb1..17d3cd1 100644 >>> --- a/translate-all.c >>> +++ b/translate-all.c >>> @@ -237,6 +237,7 @@ int cpu_gen_code(CPUArchState *env, >>> TranslationBlock *tb, int *gen_code_size_ptr >>> } >>> /* The cpu state corresponding to 'searched_pc' is restored. >>> + * Called with tb_lock held. >>> */ >>> static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock >>> *tb, >>> uintptr_t searched_pc) >>> @@ -424,6 +425,7 @@ static void page_init(void) >>> } >>> /* If alloc=1: >>> + * Called with tb_lock held for system emulation. >>> * Called with mmap_lock held for user-mode emulation. >>> */ >>> static PageDesc *page_find_alloc(tb_page_addr_t index, int alloc) >>> @@ -734,8 +736,12 @@ bool tcg_enabled(void) >>> return tcg_ctx.code_gen_buffer != NULL; >>> } >>> -/* Allocate a new translation block. Flush the translation buffer if >>> - too many translation blocks or too much generated code. */ >>> +/* >>> + * Allocate a new translation block. Flush the translation buffer if >>> + * too many translation blocks or too much generated code. >>> + * >>> + * Called with tb_lock held. >>> + */ >>> static TranslationBlock *tb_alloc(target_ulong pc) >>> { >> There is the famous tb_flush which needs to be called with tb_lock held >> as well. >> There are several place where it's called. >> >>> TranslationBlock *tb; >>> @@ -751,6 +757,7 @@ static TranslationBlock *tb_alloc(target_ulong pc) >>> return tb; >>> } >>> +/* Called with tb_lock held. */ >>> void tb_free(TranslationBlock *tb) >>> { >>> /* In practice this is mostly used for single use temporary TB >>> @@ -859,7 +866,10 @@ static void tb_invalidate_check(target_ulong >>> address) >>> } >>> } >>> -/* verify that all the pages have correct rights for code */ >>> +/* verify that all the pages have correct rights for code >>> + * >>> + * Called with tb_lock held. >>> + */ >>> static void tb_page_check(void) >>> { >>> TranslationBlock *tb; >>> @@ -947,7 +957,10 @@ static inline void tb_reset_jump(TranslationBlock >>> *tb, int n) >>> tb_set_jmp_target(tb, n, (uintptr_t)(tb->tc_ptr + >>> tb->tb_next_offset[n])); >>> } >>> -/* invalidate one TB */ >>> +/* invalidate one TB >>> + * >>> + * Called with tb_lock held. >>> + */ >>> void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) >>> { >>> CPUState *cpu; >>> @@ -1036,7 +1049,7 @@ static void build_page_bitmap(PageDesc *p) >>> } >>> #endif >>> -/* Called with mmap_lock held for user mode emulation. */ >>> +/* Called with tb_lock held, and mmap_lock too for user mode >>> emulation. */ >>> TranslationBlock *tb_gen_code(CPUState *cpu, >>> target_ulong pc, target_ulong cs_base, >>> int flags, int cflags) >>> @@ -1234,7 +1247,9 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t >>> start, int len) >>> } >>> if (!p->code_bitmap && >>> ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) { >>> - /* build code bitmap */ >>> + /* build code bitmap. FIXME: writes should be protected by >>> + * tb_lock, reads by tb_lock or RCU. >>> + */ >>> build_page_bitmap(p); >>> } >>> if (p->code_bitmap) { >>> @@ -1324,6 +1339,7 @@ static void >>> tb_invalidate_phys_page(tb_page_addr_t addr, >>> /* add the tb in the target page and protect it if necessary >>> * >>> + * Called with tb_lock held. >>> * Called with mmap_lock held for user-mode emulation. >>> */ >>> static inline void tb_alloc_page(TranslationBlock *tb, >>> @@ -1383,6 +1399,7 @@ static inline void >>> tb_alloc_page(TranslationBlock *tb, >>> /* add a new TB and link it to the physical page tables. phys_page2 is >>> * (-1) to indicate that only one page contains the TB. >>> * >>> + * Called with tb_lock held. >>> * Called with mmap_lock held for user-mode emulation. >>> */ >>> static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, >>> @@ -1423,7 +1440,10 @@ static void tb_link_page(TranslationBlock *tb, >>> tb_page_addr_t phys_pc, >>> } >>> /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < >>> - tb[1].tc_ptr. Return NULL if not found */ >>> + * tb[1].tc_ptr. Return NULL if not found >>> + * >>> + * Called with tb_lock held. >>> + */ >>> static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) >>> { >>> int m_min, m_max, m; >>> @@ -1476,6 +1496,7 @@ void tb_invalidate_phys_addr(AddressSpace *as, >>> hwaddr addr) >>> } >>> #endif /* !defined(CONFIG_USER_ONLY) */ >>> +/* Called with tb_lock held. */ >>> void tb_check_watchpoint(CPUState *cpu) >>> { >>> TranslationBlock *tb;