qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Richard Henderson <richard.henderson@linaro.org>
To: Ilya Leoshkevich <iii@linux.ibm.com>,
	Paolo Bonzini <pbonzini@redhat.com>
Cc: "Peter Maydell" <peter.maydell@linaro.org>,
	qemu-devel@nongnu.org,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
	"Christian Borntraeger" <borntraeger@de.ibm.com>,
	"Stefan Hajnoczi" <stefanha@redhat.com>,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Andreas Krebbel" <krebbel@linux.ibm.com>
Subject: Re: [PATCH RFC 1/1] accel/tcg: Clear PAGE_WRITE before translation
Date: Wed, 4 Aug 2021 14:30:50 -1000	[thread overview]
Message-ID: <507a0eae-6dcf-68c0-ee5f-40499788e2b4@linaro.org> (raw)
In-Reply-To: <20210804224633.154083-2-iii@linux.ibm.com>

On 8/4/21 12:46 PM, Ilya Leoshkevich wrote:
> translate_insn() implementations fetch instruction bytes piecemeal,
> which can cause qemu-user to generate inconsistent translations if
> another thread modifies them concurrently [1].
> 
> Fix by marking translation block pages non-writable earlier.
> 
> [1] https://lists.nongnu.org/archive/html/qemu-devel/2021-08/msg00644.html
> 
> Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> ---
>   accel/tcg/translate-all.c    | 59 +++++++++++++++++++++---------------
>   accel/tcg/translator.c       | 26 ++++++++++++++--
>   include/exec/translate-all.h |  1 +
>   3 files changed, 59 insertions(+), 27 deletions(-)
> 
> diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> index bbfcfb698c..fb9ebfad9e 100644
> --- a/accel/tcg/translate-all.c
> +++ b/accel/tcg/translate-all.c
> @@ -1297,31 +1297,8 @@ static inline void tb_page_add(PageDesc *p, TranslationBlock *tb,
>       invalidate_page_bitmap(p);
>   
>   #if defined(CONFIG_USER_ONLY)
> -    if (p->flags & PAGE_WRITE) {
> -        target_ulong addr;
> -        PageDesc *p2;
> -        int prot;
> -
> -        /* force the host page as non writable (writes will have a
> -           page fault + mprotect overhead) */
> -        page_addr &= qemu_host_page_mask;
> -        prot = 0;
> -        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
> -            addr += TARGET_PAGE_SIZE) {
> -
> -            p2 = page_find(addr >> TARGET_PAGE_BITS);
> -            if (!p2) {
> -                continue;
> -            }
> -            prot |= p2->flags;
> -            p2->flags &= ~PAGE_WRITE;
> -          }
> -        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
> -                 (prot & PAGE_BITS) & ~PAGE_WRITE);
> -        if (DEBUG_TB_INVALIDATE_GATE) {
> -            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
> -        }
> -    }
> +    /* translator_loop() must have made all TB pages non-writable */
> +    assert(!(p->flags & PAGE_WRITE));
>   #else
>       /* if some code is already present, then the pages are already
>          protected. So we handle the case where only the first TB is
> @@ -2394,6 +2371,38 @@ int page_check_range(target_ulong start, target_ulong len, int flags)
>       return 0;
>   }
>   
> +void page_protect(tb_page_addr_t page_addr)
> +{
> +    target_ulong addr;
> +    PageDesc *p;
> +    int prot;
> +
> +    p = page_find(page_addr >> TARGET_PAGE_BITS);
> +    if (p && (p->flags & PAGE_WRITE)) {
> +        /*
> +         * Force the host page as non writable (writes will have a page fault +
> +         * mprotect overhead).
> +         */
> +        page_addr &= qemu_host_page_mask;
> +        prot = 0;
> +        for (addr = page_addr; addr < page_addr + qemu_host_page_size;
> +             addr += TARGET_PAGE_SIZE) {
> +
> +            p = page_find(addr >> TARGET_PAGE_BITS);
> +            if (!p) {
> +                continue;
> +            }
> +            prot |= p->flags;
> +            p->flags &= ~PAGE_WRITE;
> +        }
> +        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
> +                 (prot & PAGE_BITS) & ~PAGE_WRITE);
> +        if (DEBUG_TB_INVALIDATE_GATE) {
> +            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT "\n", page_addr);
> +        }
> +    }
> +}
> +
>   /* called from signal handler: invalidate the code and unprotect the
>    * page. Return 0 if the fault was not handled, 1 if it was handled,
>    * and 2 if it was handled but the caller must cause the TB to be
> diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> index c53a7f8e44..bfbe7d7ccf 100644
> --- a/accel/tcg/translator.c
> +++ b/accel/tcg/translator.c
> @@ -14,6 +14,7 @@
>   #include "exec/exec-all.h"
>   #include "exec/gen-icount.h"
>   #include "exec/log.h"
> +#include "exec/translate-all.h"
>   #include "exec/translator.h"
>   #include "exec/plugin-gen.h"
>   #include "sysemu/replay.h"
> @@ -47,6 +48,10 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
>   {
>       uint32_t cflags = tb_cflags(tb);
>       bool plugin_enabled;
> +    bool stop = false;
> +#ifdef CONFIG_USER_ONLY
> +    target_ulong page_addr = -1;
> +#endif
>   
>       /* Initialize DisasContext */
>       db->tb = tb;
> @@ -71,6 +76,21 @@ void translator_loop(const TranslatorOps *ops, DisasContextBase *db,
>       plugin_enabled = plugin_gen_tb_start(cpu, tb, cflags & CF_MEMI_ONLY);
>   
>       while (true) {
> +#ifdef CONFIG_USER_ONLY
> +        /*
> +         * Make the page containing the next instruction non-writable in order
> +         * to get a consistent translation if another thread is modifying the
> +         * code while translate_insn() fetches the instruction bytes piecemeal.
> +         * Writer threads will wait for mmap_lock() in page_unprotect().
> +         */
> +        if ((db->pc_next & TARGET_PAGE_MASK) != page_addr) {
> +            page_addr = db->pc_next & TARGET_PAGE_MASK;
> +            page_protect(page_addr);
> +        }
> +#endif
> +        if (stop) {
> +            break;
> +        }

So... I think this isn't quite right.

(1) If we stop exactly at the page break, this protects the *next* page unnecessarily.

(2) This only protects the page of the start of the insn.  If the instruction crosses the 
page boundary, then the latter part of the insn is still victim to the race we're trying 
to fix.

I think a protect needs to happen in translator_ld*_swap, before reading the data.

I think that the translator_ld*_swap functions should be moved out of 
include/exec/translator.h into accel/tcg/translator.c.

I think that the translator_ld* functions should add a DisasContextBase argument, which 
should then contain the cache for the protection.  This will be a moderately large change, 
but it should be mostly mechanical.

I think that we should initialize the protection cache before translating the first insn, 
outside of that loop.  This will mean that you need not check for two pages 
simultaneously, when a single read crosses the page boundary.  You'll know that (at 
minimum) the first byte of the first read is already covered, and only need to check the 
last byte of each subsequent read.  I think the value you use for your cache should be the 
end of the page for which protection is known to apply, so that the check reduces to

   end = pc + len - 1;
   if (end > dcbase->page_protect_end) {
     dcbase->page_protect_end = end | ~TARGET_PAGE_MASK;
     page_protect(end);
   }


r~


  reply	other threads:[~2021-08-05  0:32 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-04 22:46 [PATCH RFC 0/1] accel/tcg: Clear PAGE_WRITE before translation Ilya Leoshkevich
2021-08-04 22:46 ` [PATCH RFC 1/1] " Ilya Leoshkevich
2021-08-05  0:30   ` Richard Henderson [this message]
2021-08-05 10:56     ` Ilya Leoshkevich
2021-08-05 16:59       ` Richard Henderson
2021-08-05 20:36         ` Ilya Leoshkevich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=507a0eae-6dcf-68c0-ee5f-40499788e2b4@linaro.org \
    --to=richard.henderson@linaro.org \
    --cc=alex.bennee@linaro.org \
    --cc=borntraeger@de.ibm.com \
    --cc=dgilbert@redhat.com \
    --cc=iii@linux.ibm.com \
    --cc=krebbel@linux.ibm.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).