From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from eggs.gnu.org ([2001:4830:134:3::10]:52399) by lists.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1atOc7-0005ow-Hk for qemu-devel@nongnu.org; Thu, 21 Apr 2016 20:06:28 -0400 Received: from Debian-exim by eggs.gnu.org with spam-scanned (Exim 4.71) (envelope-from ) id 1atOc4-0000Fs-B4 for qemu-devel@nongnu.org; Thu, 21 Apr 2016 20:06:27 -0400 Received: from out2-smtp.messagingengine.com ([66.111.4.26]:48148) by eggs.gnu.org with esmtp (Exim 4.71) (envelope-from ) id 1atOc4-0000Fo-6e for qemu-devel@nongnu.org; Thu, 21 Apr 2016 20:06:24 -0400 Received: from compute4.internal (compute4.nyi.internal [10.202.2.44]) by mailout.nyi.internal (Postfix) with ESMTP id 1D57E221B8 for ; Thu, 21 Apr 2016 20:06:24 -0400 (EDT) From: "Emilio G. Cota" Date: Thu, 21 Apr 2016 20:06:23 -0400 Message-Id: <1461283583-2833-1-git-send-email-cota@braap.org> Subject: [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU List-Id: List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , To: QEMU Developers , MTTCG Devel Cc: =?UTF-8?q?Alex=20Benn=C3=A9e?= , Paolo Bonzini , Peter Crosthwaite , Richard Henderson , Sergey Fedorov This is a first attempt at making tb_flush not have to stop all CPUs. There are issues as pointed out below, but this could be a good start. Context: https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg04658.html https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg06942.html Known issues: - Basically compile-tested only, since I've only run this with single-threaded TCG; I also tried running it with linux-user, but in order to trigger tb_flush I had to make code_gen_buffer so small that the CPU calling tb_flush would immediately fill the 2nd buffer, triggering the assert. If you have a working multi-threaded workload that would be good to test this, please let me know. - Windows; not even compile-tested! Signed-off-by: Emilio G. Cota --- translate-all.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 5 deletions(-) diff --git a/translate-all.c b/translate-all.c index bba9b62..4c14b4d 100644 --- a/translate-all.c +++ b/translate-all.c @@ -536,8 +536,13 @@ static inline void *split_cross_256mb(void *buf1, size_t size1) #endif #ifdef USE_STATIC_CODE_GEN_BUFFER -static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE] +static uint8_t static_code_gen_buffer1[DEFAULT_CODE_GEN_BUFFER_SIZE] __attribute__((aligned(CODE_GEN_ALIGN))); +static uint8_t static_code_gen_buffer2[DEFAULT_CODE_GEN_BUFFER_SIZE] + __attribute__((aligned(CODE_GEN_ALIGN))); +static int static_buf_mask = 1; +static void *static_buf1; +static void *static_buf2; # ifdef _WIN32 static inline void do_protect(void *addr, long size, int prot) @@ -580,13 +585,12 @@ static inline void map_none(void *addr, long size) } # endif /* WIN32 */ -static inline void *alloc_code_gen_buffer(void) +static void *alloc_static_code_gen_buffer(void *buf) { - void *buf = static_code_gen_buffer; size_t full_size, size; /* The size of the buffer, rounded down to end on a page boundary. */ - full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer)) + full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer1)) & qemu_real_host_page_mask) - (uintptr_t)buf; /* Reserve a guard page. */ @@ -612,6 +616,15 @@ static inline void *alloc_code_gen_buffer(void) return buf; } + +static inline void *alloc_code_gen_buffer(void) +{ + static_buf1 = alloc_static_code_gen_buffer(static_code_gen_buffer1); + static_buf2 = alloc_static_code_gen_buffer(static_code_gen_buffer2); + + assert(static_buf_mask == 1); + return static_buf1; +} #elif defined(_WIN32) static inline void *alloc_code_gen_buffer(void) { @@ -829,8 +842,100 @@ static void page_flush_tb(void) } } +#ifdef USE_STATIC_CODE_GEN_BUFFER + +struct code_gen_desc { + struct rcu_head rcu; + int clear_bit; +}; + +static void code_gen_buffer_clear(struct rcu_head *rcu) +{ + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, rcu); + + tb_lock(); + static_buf_mask &= ~desc->clear_bit; + tb_unlock(); + g_free(desc); +} + +static void *code_gen_buffer_replace(void) +{ + struct code_gen_desc *desc = g_malloc0(sizeof(*desc)); + + /* + * If both bits are set, we're having two concurrent flushes. This + * can easily happen if the buffers are heavily undersized. + */ + assert(static_buf_mask == 1 || static_buf_mask == 2); + + desc->clear_bit = static_buf_mask; + call_rcu1(&desc->rcu, code_gen_buffer_clear); + + if (static_buf_mask == 1) { + static_buf_mask |= 2; + return static_buf2; + } + static_buf_mask |= 1; + return static_buf1; +} + +#elif defined(_WIN32) + +struct code_gen_desc { + struct rcu_head rcu; + void *buf; +}; + +static void code_gen_buffer_vfree(struct rcu_head *rcu) +{ + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, rcu); + + VirtualFree(desc->buf, 0, MEM_RELEASE); + g_free(desc); +} + +static void *code_gen_buffer_replace(void) +{ + struct code_gen_desc *desc; + + desc = g_malloc0(sizeof(*desc)); + desc->buf = tcg_ctx.code_gen_buffer; + call_rcu1(&desc->rcu, code_gen_buffer_vfree); + + return alloc_code_gen_buffer(); +} + +#else /* UNIX, dynamically-allocated code buffer */ + +struct code_gen_desc { + struct rcu_head rcu; + void *buf; + size_t size; +}; + +static void code_gen_buffer_unmap(struct rcu_head *rcu) +{ + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, rcu); + + munmap(desc->buf, desc->size + qemu_real_host_page_size); + g_free(desc); +} + +static void *code_gen_buffer_replace(void) +{ + struct code_gen_desc *desc; + + desc = g_malloc0(sizeof(*desc)); + desc->buf = tcg_ctx.code_gen_buffer; + desc->size = tcg_ctx.code_gen_buffer_size; + call_rcu1(&desc->rcu, code_gen_buffer_unmap); + + return alloc_code_gen_buffer(); +} +#endif /* USE_STATIC_CODE_GEN_BUFFER */ + /* flush all the translation blocks */ -/* XXX: tb_flush is currently not thread safe */ void tb_flush(CPUState *cpu) { #if defined(DEBUG_FLUSH) @@ -853,10 +958,17 @@ void tb_flush(CPUState *cpu) qht_reset_size(&tcg_ctx.tb_ctx.htable, CODE_GEN_HTABLE_SIZE); page_flush_tb(); + tcg_ctx.code_gen_buffer = code_gen_buffer_replace(); tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; + tcg_prologue_init(&tcg_ctx); /* XXX: flush processor icache at this point if cache flush is expensive */ tcg_ctx.tb_ctx.tb_flush_count++; + + /* exit all CPUs so that the old buffer is quickly cleared. */ + CPU_FOREACH(cpu) { + cpu_exit(cpu); + } } #ifdef DEBUG_TB_CHECK -- 2.5.0