qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Joelle van Dyne <j@getutm.app>
To: Richard Henderson <richard.henderson@linaro.org>
Cc: QEMU Developers <qemu-devel@nongnu.org>
Subject: Re: [PATCH v2 01/19] tcg: Enhance flush_icache_range with separate data pointer
Date: Sat, 31 Oct 2020 23:54:42 -0700	[thread overview]
Message-ID: <CA+E+eSCn=NjEWrGngmgQtk+robNxL01Ksu7T6nBUNXEg90yvsQ@mail.gmail.com> (raw)
In-Reply-To: <20201030004921.721096-2-richard.henderson@linaro.org>

s->code_ptr and s->code_buf are 4 byte pointers on aarch64 so the
cache flush is off by a factor of 4

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 44b923f5fe..2c4b66965b 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -4325,7 +4325,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)

     /* flush instruction cache */
     flush_idcache_range((uintptr_t)tcg_mirror_rw_to_rx(s->code_buf),
-                        (uintptr_t)s->code_buf, s->code_ptr - s->code_buf);
+                        (uintptr_t)s->code_buf,
+                        (uintptr_t)s->code_ptr - (uintptr_t)s->code_buf);

     return tcg_current_code_size(s);
 }

With this and the other changes, split JIT works fine on iOS.

-j

On Thu, Oct 29, 2020 at 5:49 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> We are shortly going to have a split rw/rx jit buffer.  Depending
> on the host, we need to flush the dcache at the rw data pointer and
> flush the icache at the rx code pointer.
>
> For now, the two passed pointers are identical, so there is no
> effective change in behaviour.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/aarch64/tcg-target.h     |  9 +++++++--
>  tcg/arm/tcg-target.h         |  8 ++++++--
>  tcg/i386/tcg-target.h        |  3 ++-
>  tcg/mips/tcg-target.h        |  8 ++++++--
>  tcg/ppc/tcg-target.h         |  2 +-
>  tcg/riscv/tcg-target.h       |  8 ++++++--
>  tcg/s390/tcg-target.h        |  3 ++-
>  tcg/sparc/tcg-target.h       |  8 +++++---
>  tcg/tci/tcg-target.h         |  3 ++-
>  softmmu/physmem.c            |  9 ++++++++-
>  tcg/tcg.c                    |  5 +++--
>  tcg/aarch64/tcg-target.c.inc |  2 +-
>  tcg/mips/tcg-target.c.inc    |  2 +-
>  tcg/ppc/tcg-target.c.inc     | 21 +++++++++++----------
>  tcg/sparc/tcg-target.c.inc   |  4 ++--
>  15 files changed, 63 insertions(+), 32 deletions(-)
>
> diff --git a/tcg/aarch64/tcg-target.h b/tcg/aarch64/tcg-target.h
> index 663dd0b95e..d0a6a059b7 100644
> --- a/tcg/aarch64/tcg-target.h
> +++ b/tcg/aarch64/tcg-target.h
> @@ -148,9 +148,14 @@ typedef enum {
>  #define TCG_TARGET_DEFAULT_MO (0)
>  #define TCG_TARGET_HAS_MEMORY_BSWAP     1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
>  {
> -    __builtin___clear_cache((char *)start, (char *)stop);
> +    /* TODO: Copy this from gcc to avoid 4 loops instead of 2. */
> +    if (rw != rx) {
> +        __builtin___clear_cache((char *)rw, (char *)(rw + len));
> +    }
> +    __builtin___clear_cache((char *)rx, (char *)(rx + len));
>  }
>
>  void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
> diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
> index 17e771374d..fa88b24e43 100644
> --- a/tcg/arm/tcg-target.h
> +++ b/tcg/arm/tcg-target.h
> @@ -134,9 +134,13 @@ enum {
>  #define TCG_TARGET_DEFAULT_MO (0)
>  #define TCG_TARGET_HAS_MEMORY_BSWAP     1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
>  {
> -    __builtin___clear_cache((char *) start, (char *) stop);
> +    if (rw != rx) {
> +        __builtin___clear_cache((char *)rw, (char *)(rw + len));
> +    }
> +    __builtin___clear_cache((char *)rx, (char *)(rx + len));
>  }
>
>  /* not defined -- call should be eliminated at compile time */
> diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
> index abd4ac7fc0..8323e72639 100644
> --- a/tcg/i386/tcg-target.h
> +++ b/tcg/i386/tcg-target.h
> @@ -206,7 +206,8 @@ extern bool have_avx2;
>  #define TCG_TARGET_extract_i64_valid(ofs, len) \
>      (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
>  {
>  }
>
> diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
> index c6b091d849..47b1226ee9 100644
> --- a/tcg/mips/tcg-target.h
> +++ b/tcg/mips/tcg-target.h
> @@ -207,9 +207,13 @@ extern bool use_mips32r2_instructions;
>  #define TCG_TARGET_DEFAULT_MO (0)
>  #define TCG_TARGET_HAS_MEMORY_BSWAP     1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
>  {
> -    cacheflush ((void *)start, stop-start, ICACHE);
> +    if (rx != rw) {
> +        cacheflush((void *)rw, len, DCACHE);
> +    }
> +    cacheflush((void *)rx, len, ICACHE);
>  }
>
>  void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
> diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
> index be10363956..fbb6dc1b47 100644
> --- a/tcg/ppc/tcg-target.h
> +++ b/tcg/ppc/tcg-target.h
> @@ -175,7 +175,7 @@ extern bool have_vsx;
>  #define TCG_TARGET_HAS_bitsel_vec       have_vsx
>  #define TCG_TARGET_HAS_cmpsel_vec       0
>
> -void flush_icache_range(uintptr_t start, uintptr_t stop);
> +void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len);
>  void tb_target_set_jmp_target(uintptr_t, uintptr_t, uintptr_t);
>
>  #define TCG_TARGET_DEFAULT_MO (0)
> diff --git a/tcg/riscv/tcg-target.h b/tcg/riscv/tcg-target.h
> index 032439d806..0fa6ae358e 100644
> --- a/tcg/riscv/tcg-target.h
> +++ b/tcg/riscv/tcg-target.h
> @@ -159,9 +159,13 @@ typedef enum {
>  #define TCG_TARGET_HAS_mulsh_i64        1
>  #endif
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
>  {
> -    __builtin___clear_cache((char *)start, (char *)stop);
> +    if (rx != rw) {
> +        __builtin___clear_cache((char *)rw, (char *)(rw + len));
> +    }
> +    __builtin___clear_cache((char *)rx, (char *)(rx + len));
>  }
>
>  /* not defined -- call should be eliminated at compile time */
> diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
> index 63c8797bd3..c3dc2e8938 100644
> --- a/tcg/s390/tcg-target.h
> +++ b/tcg/s390/tcg-target.h
> @@ -145,7 +145,8 @@ enum {
>      TCG_AREG0 = TCG_REG_R10,
>  };
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
>  {
>  }
>
> diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
> index 633841ebf2..c27c40231e 100644
> --- a/tcg/sparc/tcg-target.h
> +++ b/tcg/sparc/tcg-target.h
> @@ -168,10 +168,12 @@ extern bool use_vis3_instructions;
>  #define TCG_TARGET_DEFAULT_MO (0)
>  #define TCG_TARGET_HAS_MEMORY_BSWAP     1
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
>  {
> -    uintptr_t p;
> -    for (p = start & -8; p < ((stop + 7) & -8); p += 8) {
> +    /* No additional data flush to the RW virtual address required. */
> +    uintptr_t p, end = (rx + len + 7) & -8;
> +    for (p = rx & -8; p < end; p += 8) {
>          __asm__ __volatile__("flush\t%0" : : "r" (p));
>      }
>  }
> diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
> index 8c1c1d265d..6460449719 100644
> --- a/tcg/tci/tcg-target.h
> +++ b/tcg/tci/tcg-target.h
> @@ -191,7 +191,8 @@ void tci_disas(uint8_t opc);
>
>  #define HAVE_TCG_QEMU_TB_EXEC
>
> -static inline void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +static inline void flush_idcache_range(uintptr_t rx, uintptr_t rw, size_t len)
>  {
>  }
>
> diff --git a/softmmu/physmem.c b/softmmu/physmem.c
> index a9adedb9f8..b23c1fef54 100644
> --- a/softmmu/physmem.c
> +++ b/softmmu/physmem.c
> @@ -2954,7 +2954,14 @@ static inline MemTxResult address_space_write_rom_internal(AddressSpace *as,
>                  invalidate_and_set_dirty(mr, addr1, l);
>                  break;
>              case FLUSH_CACHE:
> -                flush_icache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr + l);
> +                /*
> +                 * FIXME: This function is currently located in tcg/host/,
> +                 * but we never come here when tcg is enabled; only for
> +                 * real hardware acceleration.  This can actively fail
> +                 * when TCI is configured, since that function is a nop.
> +                 * We should move this to util/ or something.
> +                 */
> +                flush_idcache_range((uintptr_t)ram_ptr, (uintptr_t)ram_ptr, l);
>                  break;
>              }
>          }
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index a8c28440e2..3bf36e0cfe 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -1076,7 +1076,7 @@ void tcg_prologue_init(TCGContext *s)
>  #endif
>
>      buf1 = s->code_ptr;
> -    flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
> +    flush_idcache_range((uintptr_t)buf0, (uintptr_t)buf0, buf1 - buf0);
>
>      /* Deduct the prologue from the buffer.  */
>      prologue_size = tcg_current_code_size(s);
> @@ -4268,7 +4268,8 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
>      }
>
>      /* flush instruction cache */
> -    flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
> +    flush_idcache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_buf,
> +                        s->code_ptr - s->code_buf);
>
>      return tcg_current_code_size(s);
>  }
> diff --git a/tcg/aarch64/tcg-target.c.inc b/tcg/aarch64/tcg-target.c.inc
> index 26f71cb599..83af3108a4 100644
> --- a/tcg/aarch64/tcg-target.c.inc
> +++ b/tcg/aarch64/tcg-target.c.inc
> @@ -1363,7 +1363,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
>      }
>      pair = (uint64_t)i2 << 32 | i1;
>      qatomic_set((uint64_t *)jmp_addr, pair);
> -    flush_icache_range(jmp_addr, jmp_addr + 8);
> +    flush_idcache_range(jmp_addr, jmp_addr, 8);
>  }
>
>  static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
> diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc
> index 41be574e89..c255ecb444 100644
> --- a/tcg/mips/tcg-target.c.inc
> +++ b/tcg/mips/tcg-target.c.inc
> @@ -2660,7 +2660,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
>                                uintptr_t addr)
>  {
>      qatomic_set((uint32_t *)jmp_addr, deposit32(OPC_J, 0, 26, addr >> 2));
> -    flush_icache_range(jmp_addr, jmp_addr + 4);
> +    flush_idcache_range(jmp_addr, jmp_addr, 4);
>  }
>
>  typedef struct {
> diff --git a/tcg/ppc/tcg-target.c.inc b/tcg/ppc/tcg-target.c.inc
> index 18ee989f95..a848e98383 100644
> --- a/tcg/ppc/tcg-target.c.inc
> +++ b/tcg/ppc/tcg-target.c.inc
> @@ -1753,12 +1753,12 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
>          /* As per the enclosing if, this is ppc64.  Avoid the _Static_assert
>             within qatomic_set that would fail to build a ppc32 host.  */
>          qatomic_set__nocheck((uint64_t *)jmp_addr, pair);
> -        flush_icache_range(jmp_addr, jmp_addr + 8);
> +        flush_idcache_range(jmp_addr, jmp_addr, 8);
>      } else {
>          intptr_t diff = addr - jmp_addr;
>          tcg_debug_assert(in_range_b(diff));
>          qatomic_set((uint32_t *)jmp_addr, B | (diff & 0x3fffffc));
> -        flush_icache_range(jmp_addr, jmp_addr + 4);
> +        flush_idcache_range(jmp_addr, jmp_addr, 4);
>      }
>  }
>
> @@ -3864,22 +3864,23 @@ void tcg_register_jit(void *buf, size_t buf_size)
>  }
>  #endif /* __ELF__ */
>
> -void flush_icache_range(uintptr_t start, uintptr_t stop)
> +/* Flush the dcache at RW, and the icache at RX, as necessary. */
> +void flush_idcache_range(uintptr_t rx, uintptr_t rw, uintptr_t len)
>  {
> -    uintptr_t p, start1, stop1;
> +    uintptr_t p, start, stop;
>      size_t dsize = qemu_dcache_linesize;
>      size_t isize = qemu_icache_linesize;
>
> -    start1 = start & ~(dsize - 1);
> -    stop1 = (stop + dsize - 1) & ~(dsize - 1);
> -    for (p = start1; p < stop1; p += dsize) {
> +    start = rw & ~(dsize - 1);
> +    stop = (rw + len + dsize - 1) & ~(dsize - 1);
> +    for (p = start; p < stop; p += dsize) {
>          asm volatile ("dcbst 0,%0" : : "r"(p) : "memory");
>      }
>      asm volatile ("sync" : : : "memory");
>
> -    start &= start & ~(isize - 1);
> -    stop1 = (stop + isize - 1) & ~(isize - 1);
> -    for (p = start1; p < stop1; p += isize) {
> +    start = rx & ~(isize - 1);
> +    stop = (rx + len + isize - 1) & ~(isize - 1);
> +    for (p = start; p < stop; p += isize) {
>          asm volatile ("icbi 0,%0" : : "r"(p) : "memory");
>      }
>      asm volatile ("sync" : : : "memory");
> diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc
> index 6775bd30fc..6e2d755f6a 100644
> --- a/tcg/sparc/tcg-target.c.inc
> +++ b/tcg/sparc/tcg-target.c.inc
> @@ -1836,7 +1836,7 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
>      if (!USE_REG_TB) {
>          qatomic_set((uint32_t *)jmp_addr,
>                     deposit32(CALL, 0, 30, br_disp >> 2));
> -        flush_icache_range(jmp_addr, jmp_addr + 4);
> +        flush_idcache_range(jmp_addr, jmp_addr, 4);
>          return;
>      }
>
> @@ -1860,5 +1860,5 @@ void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
>      }
>
>      qatomic_set((uint64_t *)jmp_addr, deposit64(i2, 32, 32, i1));
> -    flush_icache_range(jmp_addr, jmp_addr + 8);
> +    flush_idcache_range(jmp_addr, jmp_addr, 8);
>  }
> --
> 2.25.1
>


  reply	other threads:[~2020-11-01  6:56 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-10-30  0:49 [PATCH v2 00/19] Mirror map JIT memory for TCG Richard Henderson
2020-10-30  0:49 ` [PATCH v2 01/19] tcg: Enhance flush_icache_range with separate data pointer Richard Henderson
2020-11-01  6:54   ` Joelle van Dyne [this message]
2020-11-03 23:02     ` Richard Henderson
2020-10-30  0:49 ` [PATCH v2 02/19] tcg: Move tcg prologue pointer out of TCGContext Richard Henderson
2020-10-30  0:49 ` [PATCH v2 03/19] tcg: Move tcg epilogue " Richard Henderson
2020-10-30  0:49 ` [PATCH v2 04/19] tcg: Introduce tcg_mirror_rw_to_rx/tcg_mirror_rx_to_rw Richard Henderson
2020-10-30  0:49 ` [PATCH v2 05/19] tcg: Adjust tcg_out_call for const Richard Henderson
2020-10-30  0:49 ` [PATCH v2 06/19] tcg: Adjust tcg_out_label " Richard Henderson
2020-10-30  0:49 ` [PATCH v2 07/19] tcg: Adjust tcg_register_jit " Richard Henderson
2020-10-30  0:49 ` [PATCH v2 08/19] tcg: Adjust tb_target_set_jmp_target for split rwx Richard Henderson
2020-10-30  0:49 ` [PATCH v2 09/19] tcg: Make DisasContextBase.tb const Richard Henderson
2020-10-30  0:49 ` [PATCH v2 10/19] tcg: Make tb arg to synchronize_from_tb const Richard Henderson
2020-10-30  0:49 ` [PATCH v2 11/19] tcg: Use Error with alloc_code_gen_buffer Richard Henderson
2020-10-30  0:49 ` [PATCH v2 12/19] tcg: Add --accel tcg,split-rwx property Richard Henderson
2020-10-30  0:49 ` [PATCH v2 13/19] accel/tcg: Support split-rwx for linux with memfd Richard Henderson
2020-10-30  0:49 ` [PATCH v2 14/19] RFC: accel/tcg: Support split-rwx for darwin/iOS with vm_remap Richard Henderson
2020-11-01  1:42   ` Joelle van Dyne
2020-11-01 21:11     ` Joelle van Dyne
2020-10-30  0:49 ` [PATCH v2 15/19] tcg: Return the rx mirror of TranslationBlock from exit_tb Richard Henderson
2020-10-30  0:49 ` [PATCH v2 16/19] tcg/i386: Support split-rwx code generation Richard Henderson
2020-10-30  0:49 ` [PATCH v2 17/19] tcg/aarch64: Use B not BL for tcg_out_goto_long Richard Henderson
2020-10-30  0:49 ` [PATCH v2 18/19] tcg/aarch64: Implement flush_idcache_range manually Richard Henderson
2020-11-01  1:25   ` Joelle van Dyne
2020-11-01 15:09     ` Richard Henderson
2020-11-03 23:08     ` Richard Henderson
2020-10-30  0:49 ` [PATCH v2 19/19] tcg/aarch64: Support split-rwx code generation Richard Henderson
2020-10-30  1:27 ` [PATCH v2 00/19] Mirror map JIT memory for TCG no-reply
2020-10-30 18:26 ` Paolo Bonzini
2020-10-30 18:57   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CA+E+eSCn=NjEWrGngmgQtk+robNxL01Ksu7T6nBUNXEg90yvsQ@mail.gmail.com' \
    --to=j@getutm.app \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).