From: "Alex Bennée" <alex.bennee@linaro.org>
To: "Emilio G. Cota" <cota@braap.org>
Cc: QEMU Developers <qemu-devel@nongnu.org>,
MTTCG Devel <mttcg@listserver.greensocs.com>,
Paolo Bonzini <pbonzini@redhat.com>,
Peter Crosthwaite <crosthwaite.peter@gmail.com>,
Richard Henderson <rth@twiddle.net>,
Sergey Fedorov <serge.fdrv@gmail.com>
Subject: Re: [Qemu-devel] [RFC v2] translate-all: protect code_gen_buffer with RCU
Date: Mon, 25 Apr 2016 16:19:59 +0100 [thread overview]
Message-ID: <87vb35emw0.fsf@linaro.org> (raw)
In-Reply-To: <1461468462-31118-1-git-send-email-cota@braap.org>
Emilio G. Cota <cota@braap.org> writes:
> [ Applies on top of bennee/mttcg/enable-mttcg-for-armv7-v1 after
> reverting "translate-all: introduces tb_flush_safe". A trivial
> conflict must be solved after applying. ]
>
> This is a first attempt at making tb_flush not have to stop all CPUs.
> There are issues as pointed out below, but this could be a good start.
>
> Context:
> https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg04658.html
> https://lists.gnu.org/archive/html/qemu-devel/2016-03/msg06942.html
>
> Changes from v1:
> - When a static buffer is used, split it in two instead of using
> a second buffer.
>
> Known issues:
> - Fails Alex' unit test with low enough -tb-size, see
> https://lists.gnu.org/archive/html/qemu-devel/2016-04/msg03465.html
> Seems to work in MTTCG, although I've only tested with tb_lock
> always being taken in tb_find_fast.
With --enable-debug-tcg I get it failing pretty quickly:
#4 0x00005555556d332a in tcg_global_alloc (s=0x555556007ba0 <tcg_ctx>)
at /home/alex/lsrc/qemu/qemu.git/tcg/tcg.c:463
463 tcg_debug_assert(s->nb_globals == s->nb_temps);
(gdb) p s->nb_globals
$1 = 24
(gdb) p s->nb_temps
$2 = 31
Seems odd though, the other threads are all waiting on the tb_lock.
> - Windows; not even compile-tested!
>
> Signed-off-by: Emilio G. Cota <cota@braap.org>
> ---
> translate-all.c | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
> 1 file changed, 133 insertions(+), 13 deletions(-)
>
> diff --git a/translate-all.c b/translate-all.c
> index 8e70583..6830371 100644
> --- a/translate-all.c
> +++ b/translate-all.c
> @@ -535,6 +535,9 @@ static inline void *split_cross_256mb(void *buf1, size_t size1)
> #ifdef USE_STATIC_CODE_GEN_BUFFER
> static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
> __attribute__((aligned(CODE_GEN_ALIGN)));
> +static int static_buf_mask = 1;
> +static void *static_buf1;
> +static void *static_buf2;
>
> # ifdef _WIN32
> static inline void do_protect(void *addr, long size, int prot)
> @@ -577,6 +580,13 @@ static inline void map_none(void *addr, long size)
> }
> # endif /* WIN32 */
>
> +static void map_static_code_gen_buffer(void *buf, size_t size)
> +{
> + map_exec(buf, size);
> + map_none(buf + size, qemu_real_host_page_size);
> + qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
> +}
> +
> static inline void *alloc_code_gen_buffer(void)
> {
> void *buf = static_code_gen_buffer;
> @@ -586,28 +596,41 @@ static inline void *alloc_code_gen_buffer(void)
> full_size = (((uintptr_t)buf + sizeof(static_code_gen_buffer))
> & qemu_real_host_page_mask) - (uintptr_t)buf;
>
> - /* Reserve a guard page. */
> - size = full_size - qemu_real_host_page_size;
> + /*
> + * Reserve two guard pages, one after each of the two buffers:
> + * | buf1 |g1| buf2 |g2|
> + */
> + size = full_size - 2 * qemu_real_host_page_size;
>
> /* Honor a command-line option limiting the size of the buffer. */
> if (size > tcg_ctx.code_gen_buffer_size) {
> size = (((uintptr_t)buf + tcg_ctx.code_gen_buffer_size)
> & qemu_real_host_page_mask) - (uintptr_t)buf;
> }
> - tcg_ctx.code_gen_buffer_size = size;
>
> #ifdef __mips__
> - if (cross_256mb(buf, size)) {
> - buf = split_cross_256mb(buf, size);
> - size = tcg_ctx.code_gen_buffer_size;
> + /*
> + * Pass 'size + page_size', since we want 'buf1 | guard1 | buf2' to be
> + * within the boundary.
> + */
> + if (cross_256mb(buf, size + qemu_real_host_page_size)) {
> + buf = split_cross_256mb(buf, size + qemu_real_host_page_size);
> + size = tcg_ctx.code_gen_buffer_size - qemu_real_host_page_size;
> }
> #endif
>
> - map_exec(buf, size);
> - map_none(buf + size, qemu_real_host_page_size);
> - qemu_madvise(buf, size, QEMU_MADV_HUGEPAGE);
> + /* split the buffer in two */
> + size /= 2;
> + tcg_ctx.code_gen_buffer_size = size;
>
> - return buf;
> + static_buf1 = buf;
> + static_buf2 = buf + size + qemu_real_host_page_mask;
> +
> + map_static_code_gen_buffer(static_buf1, size);
> + map_static_code_gen_buffer(static_buf2, size);
> +
> + assert(static_buf_mask == 1);
> + return static_buf1;
> }
> #elif defined(_WIN32)
> static inline void *alloc_code_gen_buffer(void)
> @@ -825,10 +848,100 @@ static void page_flush_tb(void)
> }
> }
>
> +#ifdef USE_STATIC_CODE_GEN_BUFFER
> +
> +struct code_gen_desc {
> + struct rcu_head rcu;
> + int clear_bit;
> +};
> +
> +static void code_gen_buffer_clear(struct rcu_head *rcu)
> +{
> + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, rcu);
> +
> + tb_lock();
> + static_buf_mask &= ~desc->clear_bit;
> + tb_unlock();
> + g_free(desc);
> +}
> +
> +static void *code_gen_buffer_replace(void)
> +{
> + struct code_gen_desc *desc = g_malloc0(sizeof(*desc));
> +
> + /*
> + * If both bits are set, we're having two concurrent flushes. This
> + * can easily happen if the buffers are heavily undersized.
> + */
> + assert(static_buf_mask == 1 || static_buf_mask == 2);
> +
> + desc->clear_bit = static_buf_mask;
> + call_rcu1(&desc->rcu, code_gen_buffer_clear);
> +
> + if (static_buf_mask == 1) {
> + static_buf_mask |= 2;
> + return static_buf2;
> + }
> + static_buf_mask |= 1;
> + return static_buf1;
> +}
> +
> +#elif defined(_WIN32)
> +
> +struct code_gen_desc {
> + struct rcu_head rcu;
> + void *buf;
> +};
> +
> +static void code_gen_buffer_vfree(struct rcu_head *rcu)
> +{
> + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, rcu);
> +
> + VirtualFree(desc->buf, 0, MEM_RELEASE);
> + g_free(desc);
> +}
> +
> +static void *code_gen_buffer_replace(void)
> +{
> + struct code_gen_desc *desc;
> +
> + desc = g_malloc0(sizeof(*desc));
> + desc->buf = tcg_ctx.code_gen_buffer;
> + call_rcu1(&desc->rcu, code_gen_buffer_vfree);
> +
> + return alloc_code_gen_buffer();
> +}
> +
> +#else /* UNIX, dynamically-allocated code buffer */
> +
> +struct code_gen_desc {
> + struct rcu_head rcu;
> + void *buf;
> + size_t size;
> +};
> +
> +static void code_gen_buffer_unmap(struct rcu_head *rcu)
> +{
> + struct code_gen_desc *desc = container_of(rcu, struct code_gen_desc, rcu);
> +
> + munmap(desc->buf, desc->size + qemu_real_host_page_size);
> + g_free(desc);
> +}
> +
> +static void *code_gen_buffer_replace(void)
> +{
> + struct code_gen_desc *desc;
> +
> + desc = g_malloc0(sizeof(*desc));
> + desc->buf = tcg_ctx.code_gen_buffer;
> + desc->size = tcg_ctx.code_gen_buffer_size;
> + call_rcu1(&desc->rcu, code_gen_buffer_unmap);
> +
> + return alloc_code_gen_buffer();
> +}
> +#endif /* USE_STATIC_CODE_GEN_BUFFER */
> +
> /* flush all the translation blocks */
> -/* XXX: tb_flush is currently not thread safe. System emulation calls it only
> - * with tb_lock taken or from safe_work, so no need to take tb_lock here.
> - */
> void tb_flush(CPUState *cpu)
> {
> #if defined(DEBUG_FLUSH)
> @@ -852,10 +965,17 @@ void tb_flush(CPUState *cpu)
> memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
> page_flush_tb();
>
> + tcg_ctx.code_gen_buffer = code_gen_buffer_replace();
> tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
> + tcg_prologue_init(&tcg_ctx);
> /* XXX: flush processor icache at this point if cache flush is
> expensive */
> tcg_ctx.tb_ctx.tb_flush_count++;
> +
> + /* exit all CPUs so that the old buffer is quickly cleared. */
> + CPU_FOREACH(cpu) {
> + cpu_exit(cpu);
> + }
> }
>
> #ifdef DEBUG_TB_CHECK
--
Alex Bennée
next prev parent reply other threads:[~2016-04-25 15:20 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-04-22 0:06 [Qemu-devel] [RFC] translate-all: protect code_gen_buffer with RCU Emilio G. Cota
2016-04-22 14:41 ` Alex Bennée
2016-04-22 14:47 ` Alex Bennée
2016-04-24 3:20 ` Emilio G. Cota
2016-04-25 8:35 ` Alex Bennée
2016-04-22 18:25 ` Richard Henderson
2016-04-24 3:27 ` [Qemu-devel] [RFC v2] " Emilio G. Cota
2016-04-24 18:12 ` Richard Henderson
2016-04-25 15:19 ` Alex Bennée [this message]
2016-04-25 15:25 ` Emilio G. Cota
2016-04-25 23:46 ` [Qemu-devel] [RFC v3] " Emilio G. Cota
2016-04-26 4:48 ` Richard Henderson
2016-04-26 6:35 ` Alex Bennée
2016-04-26 15:42 ` Richard Henderson
2016-04-26 6:32 ` Alex Bennée
2016-04-30 3:40 ` Emilio G. Cota
2016-05-09 11:21 ` Paolo Bonzini
2016-05-09 11:50 ` Alex Bennée
2016-05-09 13:55 ` Paolo Bonzini
2016-05-09 15:05 ` Alex Bennée
2016-05-09 17:07 ` Emilio G. Cota
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=87vb35emw0.fsf@linaro.org \
--to=alex.bennee@linaro.org \
--cc=cota@braap.org \
--cc=crosthwaite.peter@gmail.com \
--cc=mttcg@listserver.greensocs.com \
--cc=pbonzini@redhat.com \
--cc=qemu-devel@nongnu.org \
--cc=rth@twiddle.net \
--cc=serge.fdrv@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.