qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Ilya Leoshkevich <iii@linux.ibm.com>
To: Richard Henderson <richard.henderson@linaro.org>,
	Paolo Bonzini <pbonzini@redhat.com>
Cc: "Peter Maydell" <peter.maydell@linaro.org>,
	qemu-devel@nongnu.org,
	"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
	"Christian Borntraeger" <borntraeger@de.ibm.com>,
	"Stefan Hajnoczi" <stefanha@redhat.com>,
	"Alex Bennée" <alex.bennee@linaro.org>,
	"Andreas Krebbel" <krebbel@linux.ibm.com>
Subject: Re: [PATCH RFC 1/1] accel/tcg: Clear PAGE_WRITE before translation
Date: Thu, 05 Aug 2021 12:56:19 +0200	[thread overview]
Message-ID: <8481e2b62d5bf3295ffb309ce54fbdaf42fcef0b.camel@linux.ibm.com> (raw)
In-Reply-To: <507a0eae-6dcf-68c0-ee5f-40499788e2b4@linaro.org>

On Wed, 2021-08-04 at 14:30 -1000, Richard Henderson wrote:
> On 8/4/21 12:46 PM, Ilya Leoshkevich wrote:
> > translate_insn() implementations fetch instruction bytes piecemeal,
> > which can cause qemu-user to generate inconsistent translations if
> > another thread modifies them concurrently [1].
> > 
> > Fix by marking translation block pages non-writable earlier.
> > 
> > [1] 
> > https://lists.nongnu.org/archive/html/qemu-devel/2021-08/msg00644.html
> > 
> > Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
> > ---
> >   accel/tcg/translate-all.c    | 59 +++++++++++++++++++++----------
> > -----
> >   accel/tcg/translator.c       | 26 ++++++++++++++--
> >   include/exec/translate-all.h |  1 +
> >   3 files changed, 59 insertions(+), 27 deletions(-)
> > 
> > diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
> > index bbfcfb698c..fb9ebfad9e 100644
> > --- a/accel/tcg/translate-all.c
> > +++ b/accel/tcg/translate-all.c
> > @@ -1297,31 +1297,8 @@ static inline void tb_page_add(PageDesc *p,
> > TranslationBlock *tb,
> >       invalidate_page_bitmap(p);
> >   
> >   #if defined(CONFIG_USER_ONLY)
> > -    if (p->flags & PAGE_WRITE) {
> > -        target_ulong addr;
> > -        PageDesc *p2;
> > -        int prot;
> > -
> > -        /* force the host page as non writable (writes will have a
> > -           page fault + mprotect overhead) */
> > -        page_addr &= qemu_host_page_mask;
> > -        prot = 0;
> > -        for (addr = page_addr; addr < page_addr +
> > qemu_host_page_size;
> > -            addr += TARGET_PAGE_SIZE) {
> > -
> > -            p2 = page_find(addr >> TARGET_PAGE_BITS);
> > -            if (!p2) {
> > -                continue;
> > -            }
> > -            prot |= p2->flags;
> > -            p2->flags &= ~PAGE_WRITE;
> > -          }
> > -        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
> > -                 (prot & PAGE_BITS) & ~PAGE_WRITE);
> > -        if (DEBUG_TB_INVALIDATE_GATE) {
> > -            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT
> > "\n", page_addr);
> > -        }
> > -    }
> > +    /* translator_loop() must have made all TB pages non-writable
> > */
> > +    assert(!(p->flags & PAGE_WRITE));
> >   #else
> >       /* if some code is already present, then the pages are
> > already
> >          protected. So we handle the case where only the first TB
> > is
> > @@ -2394,6 +2371,38 @@ int page_check_range(target_ulong start,
> > target_ulong len, int flags)
> >       return 0;
> >   }
> >   
> > +void page_protect(tb_page_addr_t page_addr)
> > +{
> > +    target_ulong addr;
> > +    PageDesc *p;
> > +    int prot;
> > +
> > +    p = page_find(page_addr >> TARGET_PAGE_BITS);
> > +    if (p && (p->flags & PAGE_WRITE)) {
> > +        /*
> > +         * Force the host page as non writable (writes will have a
> > page fault +
> > +         * mprotect overhead).
> > +         */
> > +        page_addr &= qemu_host_page_mask;
> > +        prot = 0;
> > +        for (addr = page_addr; addr < page_addr +
> > qemu_host_page_size;
> > +             addr += TARGET_PAGE_SIZE) {
> > +
> > +            p = page_find(addr >> TARGET_PAGE_BITS);
> > +            if (!p) {
> > +                continue;
> > +            }
> > +            prot |= p->flags;
> > +            p->flags &= ~PAGE_WRITE;
> > +        }
> > +        mprotect(g2h_untagged(page_addr), qemu_host_page_size,
> > +                 (prot & PAGE_BITS) & ~PAGE_WRITE);
> > +        if (DEBUG_TB_INVALIDATE_GATE) {
> > +            printf("protecting code page: 0x" TB_PAGE_ADDR_FMT
> > "\n", page_addr);
> > +        }
> > +    }
> > +}
> > +
> >   /* called from signal handler: invalidate the code and unprotect
> > the
> >    * page. Return 0 if the fault was not handled, 1 if it was
> > handled,
> >    * and 2 if it was handled but the caller must cause the TB to be
> > diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
> > index c53a7f8e44..bfbe7d7ccf 100644
> > --- a/accel/tcg/translator.c
> > +++ b/accel/tcg/translator.c
> > @@ -14,6 +14,7 @@
> >   #include "exec/exec-all.h"
> >   #include "exec/gen-icount.h"
> >   #include "exec/log.h"
> > +#include "exec/translate-all.h"
> >   #include "exec/translator.h"
> >   #include "exec/plugin-gen.h"
> >   #include "sysemu/replay.h"
> > @@ -47,6 +48,10 @@ void translator_loop(const TranslatorOps *ops,
> > DisasContextBase *db,
> >   {
> >       uint32_t cflags = tb_cflags(tb);
> >       bool plugin_enabled;
> > +    bool stop = false;
> > +#ifdef CONFIG_USER_ONLY
> > +    target_ulong page_addr = -1;
> > +#endif
> >   
> >       /* Initialize DisasContext */
> >       db->tb = tb;
> > @@ -71,6 +76,21 @@ void translator_loop(const TranslatorOps *ops,
> > DisasContextBase *db,
> >       plugin_enabled = plugin_gen_tb_start(cpu, tb, cflags &
> > CF_MEMI_ONLY);
> >   
> >       while (true) {
> > +#ifdef CONFIG_USER_ONLY
> > +        /*
> > +         * Make the page containing the next instruction non-
> > writable in order
> > +         * to get a consistent translation if another thread is
> > modifying the
> > +         * code while translate_insn() fetches the instruction
> > bytes piecemeal.
> > +         * Writer threads will wait for mmap_lock() in
> > page_unprotect().
> > +         */
> > +        if ((db->pc_next & TARGET_PAGE_MASK) != page_addr) {
> > +            page_addr = db->pc_next & TARGET_PAGE_MASK;
> > +            page_protect(page_addr);
> > +        }
> > +#endif
> > +        if (stop) {
> > +            break;
> > +        }

Thanks, moving protection to just before instruction byte loads makes
perfect sense. I have just one question.

> So... I think this isn't quite right.
> 
> (1) If we stop exactly at the page break, this protects the *next*
> page unnecessarily.
> 
> (2) This only protects the page of the start of the insn.  If the
> instruction crosses the 
> page boundary, then the latter part of the insn is still victim to
> the race we're trying 
> to fix.
> 
> I think a protect needs to happen in translator_ld*_swap, before
> reading the data.
> 
> I think that the translator_ld*_swap functions should be moved out of
> include/exec/translator.h into accel/tcg/translator.c.

Do we really need this? In the end, the added code is not that large.

> I think that the translator_ld* functions should add a
> DisasContextBase argument, which 
> should then contain the cache for the protection.  This will be a
> moderately large change, 
> but it should be mostly mechanical.
> 
> I think that we should initialize the protection cache before
> translating the first insn, 
> outside of that loop.  This will mean that you need not check for two
> pages 
> simultaneously, when a single read crosses the page boundary.  You'll
> know that (at 
> minimum) the first byte of the first read is already covered, and
> only need to check the 
> last byte of each subsequent read.  I think the value you use for
> your cache should be the 
> end of the page for which protection is known to apply, so that the
> check reduces to
> 
>    end = pc + len - 1;
>    if (end > dcbase->page_protect_end) {
>      dcbase->page_protect_end = end | ~TARGET_PAGE_MASK;
>      page_protect(end);
>    }
> 
> 
> r~



  reply	other threads:[~2021-08-05 10:57 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-04 22:46 [PATCH RFC 0/1] accel/tcg: Clear PAGE_WRITE before translation Ilya Leoshkevich
2021-08-04 22:46 ` [PATCH RFC 1/1] " Ilya Leoshkevich
2021-08-05  0:30   ` Richard Henderson
2021-08-05 10:56     ` Ilya Leoshkevich [this message]
2021-08-05 16:59       ` Richard Henderson
2021-08-05 20:36         ` Ilya Leoshkevich

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=8481e2b62d5bf3295ffb309ce54fbdaf42fcef0b.camel@linux.ibm.com \
    --to=iii@linux.ibm.com \
    --cc=alex.bennee@linaro.org \
    --cc=borntraeger@de.ibm.com \
    --cc=dgilbert@redhat.com \
    --cc=krebbel@linux.ibm.com \
    --cc=pbonzini@redhat.com \
    --cc=peter.maydell@linaro.org \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --cc=stefanha@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).